mirror of
https://github.com/block/goose.git
synced 2026-04-28 03:29:36 +00:00
feat: replace subagent and skills with unified summon extension (#6964)
Signed-off-by: Travis Longwell <travis@block.xyz>
This commit is contained in:
parent
1168d7d9b1
commit
7ea19f5c83
26 changed files with 2616 additions and 1935 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -72,3 +72,4 @@ result
|
|||
|
||||
# Goose self-test artifacts
|
||||
gooseselftest/
|
||||
.tasks/
|
||||
|
|
|
|||
|
|
@ -97,7 +97,10 @@ mod tests {
|
|||
input_config.additional_system_prompt,
|
||||
Some("test_instructions my_value".to_string())
|
||||
);
|
||||
assert!(recipe.extensions.is_none());
|
||||
assert!(recipe
|
||||
.extensions
|
||||
.as_ref()
|
||||
.is_none_or(|e| e.iter().all(|ext| ext.name() == "summon")));
|
||||
|
||||
assert!(settings.is_some());
|
||||
let settings = settings.unwrap();
|
||||
|
|
@ -162,7 +165,10 @@ mod tests {
|
|||
input_config.additional_system_prompt,
|
||||
Some("test_instructions my_value".to_string())
|
||||
);
|
||||
assert!(recipe.extensions.is_none());
|
||||
assert!(recipe
|
||||
.extensions
|
||||
.as_ref()
|
||||
.is_none_or(|e| e.iter().all(|ext| ext.name() == "summon")));
|
||||
|
||||
assert!(settings.is_some());
|
||||
let settings = settings.unwrap();
|
||||
|
|
|
|||
|
|
@ -600,11 +600,7 @@ pub async fn build_session(session_config: SessionBuilderConfig) -> CliSession {
|
|||
let recipe = session_config.recipe.as_ref();
|
||||
|
||||
agent
|
||||
.apply_recipe_components(
|
||||
recipe.and_then(|r| r.sub_recipes.clone()),
|
||||
recipe.and_then(|r| r.response.clone()),
|
||||
true,
|
||||
)
|
||||
.apply_recipe_components(recipe.and_then(|r| r.response.clone()), true)
|
||||
.await;
|
||||
|
||||
let new_provider = match create(&resolved.provider_name, resolved.model_config).await {
|
||||
|
|
@ -643,6 +639,17 @@ pub async fn build_session(session_config: SessionBuilderConfig) -> CliSession {
|
|||
process::exit(1);
|
||||
});
|
||||
|
||||
if let Some(recipe) = session_config.recipe.clone() {
|
||||
if let Err(e) = session_manager
|
||||
.update(&session_id)
|
||||
.recipe(Some(recipe))
|
||||
.apply()
|
||||
.await
|
||||
{
|
||||
tracing::warn!("Failed to store recipe on session: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
if session_config.resume {
|
||||
handle_resumed_session_workdir(&agent, &session_id, session_config.interactive).await;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@ use tokio_util::task::AbortOnDropHandle;
|
|||
pub use self::export::message_to_markdown;
|
||||
pub use builder::{build_session, SessionBuilderConfig};
|
||||
use console::Color;
|
||||
use goose::agents::subagent_handler::SUBAGENT_TOOL_REQUEST_TYPE;
|
||||
use goose::agents::AgentEvent;
|
||||
use goose::agents::SUBAGENT_TOOL_REQUEST_TYPE;
|
||||
use goose::permission::permission_confirmation::PrincipalType;
|
||||
use goose::permission::Permission;
|
||||
use goose::permission::PermissionConfirmation;
|
||||
|
|
|
|||
|
|
@ -314,8 +314,9 @@ fn render_tool_request(req: &ToolRequest, theme: Theme, debug: bool) {
|
|||
Ok(call) => match call.name.to_string().as_str() {
|
||||
"developer__text_editor" => render_text_editor_request(call, debug),
|
||||
"developer__shell" => render_shell_request(call, debug),
|
||||
"code_execution__execute" => render_execute_code_request(call, debug),
|
||||
"subagent" => render_subagent_request(call, debug),
|
||||
"execute" | "execute_code" => render_execute_code_request(call, debug),
|
||||
"delegate" => render_delegate_request(call, debug),
|
||||
"subagent" => render_delegate_request(call, debug),
|
||||
"todo__write" => render_todo_request(call, debug),
|
||||
_ => render_default_request(call, debug),
|
||||
},
|
||||
|
|
@ -555,12 +556,12 @@ fn render_execute_code_request(call: &CallToolRequestParams, debug: bool) {
|
|||
println!();
|
||||
}
|
||||
|
||||
fn render_subagent_request(call: &CallToolRequestParams, debug: bool) {
|
||||
fn render_delegate_request(call: &CallToolRequestParams, debug: bool) {
|
||||
print_tool_header(call);
|
||||
|
||||
if let Some(args) = &call.arguments {
|
||||
if let Some(Value::String(subrecipe)) = args.get("subrecipe") {
|
||||
println!("{}: {}", style("subrecipe").dim(), style(subrecipe).cyan());
|
||||
if let Some(Value::String(source)) = args.get("source") {
|
||||
println!("{}: {}", style("source").dim(), style(source).cyan());
|
||||
}
|
||||
|
||||
if let Some(Value::String(instructions)) = args.get("instructions") {
|
||||
|
|
@ -581,7 +582,7 @@ fn render_subagent_request(call: &CallToolRequestParams, debug: bool) {
|
|||
print_params(&Some(params.clone()), 1, debug);
|
||||
}
|
||||
|
||||
let skip_keys = ["subrecipe", "instructions", "parameters"];
|
||||
let skip_keys = ["source", "instructions", "parameters"];
|
||||
let mut other_args = serde_json::Map::new();
|
||||
for (k, v) in args {
|
||||
if !skip_keys.contains(&k.as_str()) {
|
||||
|
|
|
|||
|
|
@ -163,11 +163,7 @@ pub async fn apply_recipe_to_agent(
|
|||
include_final_output_tool: bool,
|
||||
) -> Option<String> {
|
||||
agent
|
||||
.apply_recipe_components(
|
||||
recipe.sub_recipes.clone(),
|
||||
recipe.response.clone(),
|
||||
include_final_output_tool,
|
||||
)
|
||||
.apply_recipe_components(recipe.response.clone(), include_final_output_tool)
|
||||
.await;
|
||||
|
||||
recipe.instructions.as_ref().map(|instructions| {
|
||||
|
|
|
|||
|
|
@ -20,10 +20,6 @@ use crate::agents::final_output_tool::{FINAL_OUTPUT_CONTINUATION_MESSAGE, FINAL_
|
|||
use crate::agents::platform_tools::PLATFORM_MANAGE_SCHEDULE_TOOL_NAME;
|
||||
use crate::agents::prompt_manager::PromptManager;
|
||||
use crate::agents::retry::{RetryManager, RetryResult};
|
||||
use crate::agents::subagent_task_config::TaskConfig;
|
||||
use crate::agents::subagent_tool::{
|
||||
create_subagent_tool, handle_subagent_tool, SUBAGENT_TOOL_NAME,
|
||||
};
|
||||
use crate::agents::types::{FrontendTool, SessionConfig, SharedProvider, ToolResultReceiver};
|
||||
use crate::config::permission::PermissionManager;
|
||||
use crate::config::{get_enabled_extensions, Config, GooseMode};
|
||||
|
|
@ -42,11 +38,11 @@ use crate::permission::permission_judge::PermissionCheckResult;
|
|||
use crate::permission::PermissionConfirmation;
|
||||
use crate::providers::base::Provider;
|
||||
use crate::providers::errors::ProviderError;
|
||||
use crate::recipe::{Author, Recipe, Response, Settings, SubRecipe};
|
||||
use crate::recipe::{Author, Recipe, Response, Settings};
|
||||
use crate::scheduler_trait::SchedulerTrait;
|
||||
use crate::security::security_inspector::SecurityInspector;
|
||||
use crate::session::extension_data::{EnabledExtensionsState, ExtensionState};
|
||||
use crate::session::{Session, SessionManager, SessionType};
|
||||
use crate::session::{Session, SessionManager};
|
||||
use crate::tool_inspection::ToolInspectionManager;
|
||||
use crate::tool_monitor::RepetitionInspector;
|
||||
use crate::utils::is_token_cancelled;
|
||||
|
|
@ -121,7 +117,6 @@ pub struct Agent {
|
|||
pub config: AgentConfig,
|
||||
|
||||
pub extension_manager: Arc<ExtensionManager>,
|
||||
pub(super) sub_recipes: Mutex<HashMap<String, SubRecipe>>,
|
||||
pub(super) final_output_tool: Arc<Mutex<Option<FinalOutputTool>>>,
|
||||
pub(super) frontend_tools: Mutex<HashMap<String, FrontendTool>>,
|
||||
pub(super) frontend_instructions: Mutex<Option<String>>,
|
||||
|
|
@ -210,7 +205,6 @@ impl Agent {
|
|||
provider: provider.clone(),
|
||||
config,
|
||||
extension_manager: Arc::new(ExtensionManager::new(provider.clone(), session_manager)),
|
||||
sub_recipes: Mutex::new(HashMap::new()),
|
||||
final_output_tool: Arc::new(Mutex::new(None)),
|
||||
frontend_tools: Mutex::new(HashMap::new()),
|
||||
frontend_instructions: Mutex::new(None),
|
||||
|
|
@ -452,23 +446,11 @@ impl Agent {
|
|||
self.extend_system_prompt(final_output_system_prompt).await;
|
||||
}
|
||||
|
||||
pub async fn add_sub_recipes(&self, sub_recipes_to_add: Vec<SubRecipe>) {
|
||||
let mut sub_recipes = self.sub_recipes.lock().await;
|
||||
for sr in sub_recipes_to_add {
|
||||
sub_recipes.insert(sr.name.clone(), sr);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn apply_recipe_components(
|
||||
&self,
|
||||
sub_recipes: Option<Vec<SubRecipe>>,
|
||||
response: Option<Response>,
|
||||
include_final_output: bool,
|
||||
) {
|
||||
if let Some(sub_recipes) = sub_recipes {
|
||||
self.add_sub_recipes(sub_recipes).await;
|
||||
}
|
||||
|
||||
if include_final_output {
|
||||
if let Some(response) = response {
|
||||
self.add_final_output_tool(response).await;
|
||||
|
|
@ -485,18 +467,6 @@ impl Agent {
|
|||
cancellation_token: Option<CancellationToken>,
|
||||
session: &Session,
|
||||
) -> (String, Result<ToolCallResult, ErrorData>) {
|
||||
// Prevent subagents from creating other subagents
|
||||
if session.session_type == SessionType::SubAgent && tool_call.name == SUBAGENT_TOOL_NAME {
|
||||
return (
|
||||
request_id,
|
||||
Err(ErrorData::new(
|
||||
ErrorCode::INVALID_REQUEST,
|
||||
"Subagents cannot create other subagents".to_string(),
|
||||
None,
|
||||
)),
|
||||
);
|
||||
}
|
||||
|
||||
if tool_call.name == PLATFORM_MANAGE_SCHEDULE_TOOL_NAME {
|
||||
let arguments = tool_call
|
||||
.arguments
|
||||
|
|
@ -531,49 +501,7 @@ impl Agent {
|
|||
}
|
||||
|
||||
debug!("WAITING_TOOL_START: {}", tool_call.name);
|
||||
let result: ToolCallResult = if tool_call.name == SUBAGENT_TOOL_NAME {
|
||||
let provider = match self.provider().await {
|
||||
Ok(p) => p,
|
||||
Err(_) => {
|
||||
return (
|
||||
request_id,
|
||||
Err(ErrorData::new(
|
||||
ErrorCode::INTERNAL_ERROR,
|
||||
"Provider is required".to_string(),
|
||||
None,
|
||||
)),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let extensions = self.get_extension_configs().await;
|
||||
|
||||
let max_turns_from_recipe = session
|
||||
.recipe
|
||||
.as_ref()
|
||||
.and_then(|r| r.settings.as_ref())
|
||||
.and_then(|s| s.max_turns);
|
||||
|
||||
let task_config =
|
||||
TaskConfig::new(provider, &session.id, &session.working_dir, extensions)
|
||||
.with_max_turns(max_turns_from_recipe);
|
||||
let sub_recipes = self.sub_recipes.lock().await.clone();
|
||||
|
||||
let arguments = tool_call
|
||||
.arguments
|
||||
.clone()
|
||||
.map(Value::Object)
|
||||
.unwrap_or(Value::Object(serde_json::Map::new()));
|
||||
|
||||
handle_subagent_tool(
|
||||
&self.config,
|
||||
arguments,
|
||||
task_config,
|
||||
sub_recipes,
|
||||
session.working_dir.clone(),
|
||||
cancellation_token,
|
||||
)
|
||||
} else if self.is_frontend_tool(&tool_call.name).await {
|
||||
let result: ToolCallResult = if self.is_frontend_tool(&tool_call.name).await {
|
||||
// For frontend tools, return an error indicating we need frontend execution
|
||||
ToolCallResult::from(Err(ErrorData::new(
|
||||
ErrorCode::INTERNAL_ERROR,
|
||||
|
|
@ -807,30 +735,6 @@ impl Agent {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn subagents_enabled(&self, session_id: &str) -> bool {
|
||||
if self.config.goose_mode != GooseMode::Auto {
|
||||
return false;
|
||||
}
|
||||
let context = self.extension_manager.get_context();
|
||||
if matches!(
|
||||
context
|
||||
.session_manager
|
||||
.get_session(session_id, false)
|
||||
.await
|
||||
.ok()
|
||||
.map(|session| session.session_type),
|
||||
Some(SessionType::SubAgent)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
!self
|
||||
.extension_manager
|
||||
.list_extensions()
|
||||
.await
|
||||
.map(|ext| ext.is_empty())
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
pub async fn list_tools(&self, session_id: &str, extension_name: Option<String>) -> Vec<Tool> {
|
||||
let mut prefixed_tools = self
|
||||
.extension_manager
|
||||
|
|
@ -838,7 +742,6 @@ impl Agent {
|
|||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let subagents_enabled = self.subagents_enabled(session_id).await;
|
||||
if (extension_name.is_none() || extension_name.as_deref() == Some("platform"))
|
||||
&& self.config.scheduler_service.is_some()
|
||||
{
|
||||
|
|
@ -849,12 +752,6 @@ impl Agent {
|
|||
if let Some(final_output_tool) = self.final_output_tool.lock().await.as_ref() {
|
||||
prefixed_tools.push(final_output_tool.tool());
|
||||
}
|
||||
|
||||
if subagents_enabled {
|
||||
let sub_recipes = self.sub_recipes.lock().await;
|
||||
let sub_recipes_vec: Vec<_> = sub_recipes.values().cloned().collect();
|
||||
prefixed_tools.push(create_subagent_tool(&sub_recipes_vec));
|
||||
}
|
||||
}
|
||||
|
||||
prefixed_tools
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use include_dir::{include_dir, Dir};
|
|||
static BUILTIN_SKILLS_DIR: Dir =
|
||||
include_dir!("$CARGO_MANIFEST_DIR/src/agents/builtin_skills/skills");
|
||||
|
||||
pub fn get_all_builtin_skills() -> Vec<&'static str> {
|
||||
pub fn get_all() -> Vec<&'static str> {
|
||||
BUILTIN_SKILLS_DIR
|
||||
.files()
|
||||
.filter(|f| f.path().extension().is_some_and(|ext| ext == "md"))
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use crate::agents::extension::PlatformExtensionContext;
|
||||
use crate::agents::extension_manager::get_tool_owner;
|
||||
use crate::agents::mcp_client::{Error, McpClientTrait};
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
|
@ -111,10 +112,16 @@ impl CodeExecutionClient {
|
|||
let mut cfgs = vec![];
|
||||
for tool in tools {
|
||||
let full_name = tool.name.to_string();
|
||||
let (server_name, tool_name) = full_name.split_once("__")?;
|
||||
let (namespace, name) = if let Some((server, tool_name)) = full_name.split_once("__") {
|
||||
(server.to_string(), tool_name.to_string())
|
||||
} else if let Some(owner) = get_tool_owner(&tool) {
|
||||
(owner, full_name)
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
cfgs.push(CallbackConfig {
|
||||
name: tool_name.into(),
|
||||
namespace: server_name.into(),
|
||||
name,
|
||||
namespace,
|
||||
description: tool.description.as_ref().map(|d| d.to_string()),
|
||||
input_schema: Some(json!(tool.input_schema)),
|
||||
output_schema: tool.output_schema.as_ref().map(|s| json!(s)),
|
||||
|
|
|
|||
|
|
@ -378,7 +378,7 @@ impl Agent {
|
|||
Err(e) => return Err(anyhow!("Failed to build recipe: {}", e)),
|
||||
};
|
||||
|
||||
self.apply_recipe_components(recipe.sub_recipes.clone(), recipe.response.clone(), true)
|
||||
self.apply_recipe_components(recipe.response.clone(), true)
|
||||
.await;
|
||||
|
||||
let prompt = [recipe.instructions.as_deref(), recipe.prompt.as_deref()]
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use crate::agents::apps_extension;
|
|||
use crate::agents::chatrecall_extension;
|
||||
use crate::agents::code_execution_extension;
|
||||
use crate::agents::extension_manager_extension;
|
||||
use crate::agents::skills_extension;
|
||||
use crate::agents::summon_extension;
|
||||
use crate::agents::todo_extension;
|
||||
use crate::agents::tom_extension;
|
||||
use std::collections::HashMap;
|
||||
|
|
@ -53,6 +53,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
|
|||
description:
|
||||
"Enable a todo list for goose so it can keep track of what it is doing",
|
||||
default_enabled: true,
|
||||
unprefixed_tools: false,
|
||||
client_factory: |ctx| Box::new(todo_extension::TodoClient::new(ctx).unwrap()),
|
||||
},
|
||||
);
|
||||
|
|
@ -65,6 +66,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
|
|||
description:
|
||||
"Create and manage custom Goose apps through chat. Apps are HTML/CSS/JavaScript and run in sandboxed windows.",
|
||||
default_enabled: true,
|
||||
unprefixed_tools: false,
|
||||
client_factory: |ctx| Box::new(apps_extension::AppsManagerClient::new(ctx).unwrap()),
|
||||
},
|
||||
);
|
||||
|
|
@ -77,6 +79,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
|
|||
description:
|
||||
"Search past conversations and load session summaries for contextual memory",
|
||||
default_enabled: false,
|
||||
unprefixed_tools: false,
|
||||
client_factory: |ctx| {
|
||||
Box::new(chatrecall_extension::ChatRecallClient::new(ctx).unwrap())
|
||||
},
|
||||
|
|
@ -91,18 +94,20 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
|
|||
description:
|
||||
"Enable extension management tools for discovering, enabling, and disabling extensions",
|
||||
default_enabled: true,
|
||||
unprefixed_tools: false,
|
||||
client_factory: |ctx| Box::new(extension_manager_extension::ExtensionManagerClient::new(ctx).unwrap()),
|
||||
},
|
||||
);
|
||||
|
||||
map.insert(
|
||||
skills_extension::EXTENSION_NAME,
|
||||
summon_extension::EXTENSION_NAME,
|
||||
PlatformExtensionDef {
|
||||
name: skills_extension::EXTENSION_NAME,
|
||||
display_name: "Skills",
|
||||
description: "Load and use skills from relevant directories",
|
||||
name: summon_extension::EXTENSION_NAME,
|
||||
display_name: "Summon",
|
||||
description: "Load knowledge and delegate tasks to subagents",
|
||||
default_enabled: true,
|
||||
client_factory: |ctx| Box::new(skills_extension::SkillsClient::new(ctx).unwrap()),
|
||||
unprefixed_tools: true,
|
||||
client_factory: |ctx| Box::new(summon_extension::SummonClient::new(ctx).unwrap()),
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -114,6 +119,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
|
|||
description:
|
||||
"Goose will make extension calls through code execution, saving tokens",
|
||||
default_enabled: false,
|
||||
unprefixed_tools: true,
|
||||
client_factory: |ctx| {
|
||||
Box::new(code_execution_extension::CodeExecutionClient::new(ctx).unwrap())
|
||||
},
|
||||
|
|
@ -128,6 +134,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
|
|||
description:
|
||||
"Inject custom context into every turn via GOOSE_MOIM_MESSAGE_TEXT and GOOSE_MOIM_MESSAGE_FILE environment variables",
|
||||
default_enabled: true,
|
||||
unprefixed_tools: false,
|
||||
client_factory: |ctx| Box::new(tom_extension::TomClient::new(ctx).unwrap()),
|
||||
},
|
||||
);
|
||||
|
|
@ -175,12 +182,15 @@ impl PlatformExtensionContext {
|
|||
}
|
||||
}
|
||||
|
||||
/// Definition for a platform extension that runs in-process with direct agent access.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PlatformExtensionDef {
|
||||
pub name: &'static str,
|
||||
pub display_name: &'static str,
|
||||
pub description: &'static str,
|
||||
pub default_enabled: bool,
|
||||
/// If true, tools are exposed without extension prefix for intuitive first-class use.
|
||||
pub unprefixed_tools: bool,
|
||||
pub client_factory: fn(PlatformExtensionContext) -> Box<dyn McpClientTrait>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -105,18 +105,18 @@ pub struct ExtensionManager {
|
|||
/// A flattened representation of a resource used by the agent to prepare inference
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ResourceItem {
|
||||
pub client_name: String, // The name of the client that owns the resource
|
||||
pub uri: String, // The URI of the resource
|
||||
pub name: String, // The name of the resource
|
||||
pub content: String, // The content of the resource
|
||||
pub extension_name: String, // The name of the extension that owns the resource
|
||||
pub uri: String, // The URI of the resource
|
||||
pub name: String, // The name of the resource
|
||||
pub content: String, // The content of the resource
|
||||
pub timestamp: DateTime<Utc>, // The timestamp of the resource
|
||||
pub priority: f32, // The priority of the resource
|
||||
pub priority: f32, // The priority of the resource
|
||||
pub token_count: Option<u32>, // The token count of the resource (filled in by the agent)
|
||||
}
|
||||
|
||||
impl ResourceItem {
|
||||
pub fn new(
|
||||
client_name: String,
|
||||
extension_name: String,
|
||||
uri: String,
|
||||
name: String,
|
||||
content: String,
|
||||
|
|
@ -124,7 +124,7 @@ impl ResourceItem {
|
|||
priority: f32,
|
||||
) -> Self {
|
||||
Self {
|
||||
client_name,
|
||||
extension_name,
|
||||
uri,
|
||||
name,
|
||||
content,
|
||||
|
|
@ -199,6 +199,41 @@ pub fn get_parameter_names(tool: &Tool) -> Vec<String> {
|
|||
names
|
||||
}
|
||||
|
||||
const TOOL_EXTENSION_META_KEY: &str = "goose_extension";
|
||||
|
||||
pub fn get_tool_owner(tool: &Tool) -> Option<String> {
|
||||
tool.meta
|
||||
.as_ref()
|
||||
.and_then(|m| m.0.get(TOOL_EXTENSION_META_KEY))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string())
|
||||
}
|
||||
|
||||
fn is_unprefixed_extension(config: &ExtensionConfig) -> bool {
|
||||
if let ExtensionConfig::Platform { name, .. } = config {
|
||||
PLATFORM_EXTENSIONS
|
||||
.get(name_to_key(name).as_str())
|
||||
.is_some_and(|def| def.unprefixed_tools)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the named extension is a first-class platform extension
|
||||
/// whose tools are exposed unprefixed and remain visible during code execution mode.
|
||||
pub fn is_first_class_extension(name: &str) -> bool {
|
||||
PLATFORM_EXTENSIONS
|
||||
.get(name_to_key(name).as_str())
|
||||
.is_some_and(|def| def.unprefixed_tools)
|
||||
}
|
||||
|
||||
/// Result of resolving a tool call to its owning extension
|
||||
struct ResolvedTool {
|
||||
extension_name: String,
|
||||
actual_tool_name: String,
|
||||
client: McpClientBox,
|
||||
}
|
||||
|
||||
async fn child_process_client(
|
||||
mut command: Command,
|
||||
timeout: &Option<u64>,
|
||||
|
|
@ -789,16 +824,18 @@ impl ExtensionManager {
|
|||
tools
|
||||
.iter()
|
||||
.filter(|tool| {
|
||||
let tool_prefix = tool.name.split("__").next().unwrap_or("");
|
||||
let tool_owner = get_tool_owner(tool)
|
||||
.map(|s| name_to_key(&s))
|
||||
.unwrap_or_else(|| tool.name.split("__").next().unwrap_or("").to_string());
|
||||
|
||||
if let Some(ref excluded) = exclude_normalized {
|
||||
if tool_prefix == excluded {
|
||||
if tool_owner == *excluded {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref name_filter) = extension_name_normalized {
|
||||
tool_prefix == name_filter
|
||||
tool_owner == *name_filter
|
||||
} else {
|
||||
true
|
||||
}
|
||||
|
|
@ -861,18 +898,36 @@ impl ExtensionManager {
|
|||
}
|
||||
};
|
||||
|
||||
let expose_unprefixed = is_unprefixed_extension(&config);
|
||||
|
||||
loop {
|
||||
for tool in client_tools.tools {
|
||||
if config.is_tool_available(&tool.name) {
|
||||
let public_name = if expose_unprefixed {
|
||||
tool.name.to_string()
|
||||
} else {
|
||||
format!("{}__{}", name, tool.name)
|
||||
};
|
||||
|
||||
let mut meta_map = tool
|
||||
.meta
|
||||
.as_ref()
|
||||
.map(|m| m.0.clone())
|
||||
.unwrap_or_default();
|
||||
meta_map.insert(
|
||||
TOOL_EXTENSION_META_KEY.to_string(),
|
||||
serde_json::Value::String(name.clone()),
|
||||
);
|
||||
|
||||
tools.push(Tool {
|
||||
name: format!("{}__{}", name, tool.name).into(),
|
||||
name: public_name.into(),
|
||||
description: tool.description,
|
||||
input_schema: tool.input_schema,
|
||||
annotations: tool.annotations,
|
||||
output_schema: tool.output_schema,
|
||||
icons: tool.icons,
|
||||
title: tool.title,
|
||||
meta: tool.meta,
|
||||
meta: Some(rmcp::model::Meta(meta_map)),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -899,9 +954,22 @@ impl ExtensionManager {
|
|||
|
||||
let results = future::join_all(client_futures).await;
|
||||
|
||||
let mut seen_names: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
let mut tools = Vec::new();
|
||||
for (_, client_tools) in results {
|
||||
tools.extend(client_tools);
|
||||
for (ext_name, client_tools) in results {
|
||||
for tool in client_tools {
|
||||
let tool_name = tool.name.to_string();
|
||||
if seen_names.contains(&tool_name) {
|
||||
warn!(
|
||||
tool = %tool_name,
|
||||
extension = %ext_name,
|
||||
"Duplicate tool name - skipping"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
seen_names.insert(tool_name);
|
||||
tools.push(tool);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tools)
|
||||
|
|
@ -915,16 +983,6 @@ impl ExtensionManager {
|
|||
prompt_template::render_template("plan.md", &context).expect("Prompt should render")
|
||||
}
|
||||
|
||||
/// Find and return a reference to the appropriate client for a tool call
|
||||
async fn get_client_for_tool(&self, prefixed_name: &str) -> Option<(String, McpClientBox)> {
|
||||
self.extensions
|
||||
.lock()
|
||||
.await
|
||||
.iter()
|
||||
.find(|(key, _)| prefixed_name.starts_with(*key))
|
||||
.map(|(name, extension)| (name.clone(), extension.get_client()))
|
||||
}
|
||||
|
||||
// Function that gets executed for read_resource tool
|
||||
pub async fn read_resource_tool(
|
||||
&self,
|
||||
|
|
@ -1183,6 +1241,66 @@ impl ExtensionManager {
|
|||
}
|
||||
}
|
||||
|
||||
async fn resolve_tool(
|
||||
&self,
|
||||
session_id: &str,
|
||||
tool_name: &str,
|
||||
) -> Result<ResolvedTool, ErrorData> {
|
||||
if let Some((prefix, actual)) = tool_name.split_once("__") {
|
||||
let owner = name_to_key(prefix);
|
||||
if let Some(client) = self.get_server_client(&owner).await {
|
||||
return Ok(ResolvedTool {
|
||||
extension_name: owner,
|
||||
actual_tool_name: actual.to_string(),
|
||||
client,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let tools = self.get_all_tools_cached(session_id).await.map_err(|e| {
|
||||
ErrorData::new(
|
||||
ErrorCode::INTERNAL_ERROR,
|
||||
format!("Failed to get tools: {}", e),
|
||||
None,
|
||||
)
|
||||
})?;
|
||||
|
||||
if let Some(tool) = tools.iter().find(|t| *t.name == *tool_name) {
|
||||
let owner = get_tool_owner(tool).ok_or_else(|| {
|
||||
ErrorData::new(
|
||||
ErrorCode::RESOURCE_NOT_FOUND,
|
||||
format!("Tool '{}' has no owner", tool_name),
|
||||
None,
|
||||
)
|
||||
})?;
|
||||
|
||||
let actual_tool_name = tool_name
|
||||
.strip_prefix(&format!("{owner}__"))
|
||||
.unwrap_or(tool_name)
|
||||
.to_string();
|
||||
|
||||
let client = self.get_server_client(&owner).await.ok_or_else(|| {
|
||||
ErrorData::new(
|
||||
ErrorCode::RESOURCE_NOT_FOUND,
|
||||
format!("Extension '{}' not found for tool '{}'", owner, tool_name),
|
||||
None,
|
||||
)
|
||||
})?;
|
||||
|
||||
return Ok(ResolvedTool {
|
||||
extension_name: owner,
|
||||
actual_tool_name,
|
||||
client,
|
||||
});
|
||||
}
|
||||
|
||||
Err(ErrorData::new(
|
||||
ErrorCode::RESOURCE_NOT_FOUND,
|
||||
format!("Tool '{}' not found", tool_name),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn dispatch_tool_call(
|
||||
&self,
|
||||
session_id: &str,
|
||||
|
|
@ -1190,52 +1308,19 @@ impl ExtensionManager {
|
|||
working_dir: Option<&std::path::Path>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> Result<ToolCallResult> {
|
||||
// Some models strip the tool prefix, so auto-add it for known code_execution tools
|
||||
let tool_name_str = tool_call.name.to_string();
|
||||
let prefixed_name = if !tool_name_str.contains("__") {
|
||||
let code_exec_tools = ["execute", "list_functions", "get_function_details"];
|
||||
if code_exec_tools.contains(&tool_name_str.as_str())
|
||||
&& self.extensions.lock().await.contains_key("code_execution")
|
||||
let resolved = self.resolve_tool(session_id, &tool_name_str).await?;
|
||||
|
||||
if let Some(extension) = self.extensions.lock().await.get(&resolved.extension_name) {
|
||||
if !extension
|
||||
.config
|
||||
.is_tool_available(&resolved.actual_tool_name)
|
||||
{
|
||||
format!("code_execution__{tool_name_str}")
|
||||
} else {
|
||||
tool_name_str
|
||||
}
|
||||
} else {
|
||||
tool_name_str
|
||||
};
|
||||
|
||||
// Dispatch tool call based on the prefix naming convention
|
||||
let (client_name, client) =
|
||||
self.get_client_for_tool(&prefixed_name)
|
||||
.await
|
||||
.ok_or_else(|| {
|
||||
ErrorData::new(
|
||||
ErrorCode::RESOURCE_NOT_FOUND,
|
||||
format!("Tool '{}' not found", tool_call.name),
|
||||
None,
|
||||
)
|
||||
})?;
|
||||
|
||||
let tool_name = prefixed_name
|
||||
.strip_prefix(client_name.as_str())
|
||||
.and_then(|s| s.strip_prefix("__"))
|
||||
.ok_or_else(|| {
|
||||
ErrorData::new(
|
||||
ErrorCode::RESOURCE_NOT_FOUND,
|
||||
format!("Invalid tool name format: '{}'", tool_call.name),
|
||||
None,
|
||||
)
|
||||
})?
|
||||
.to_string();
|
||||
|
||||
if let Some(extension) = self.extensions.lock().await.get(&client_name) {
|
||||
if !extension.config.is_tool_available(&tool_name) {
|
||||
return Err(ErrorData::new(
|
||||
ErrorCode::RESOURCE_NOT_FOUND,
|
||||
format!(
|
||||
"Tool '{}' is not available for extension '{}'",
|
||||
tool_name, client_name
|
||||
resolved.actual_tool_name, resolved.extension_name
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
|
@ -1244,15 +1329,16 @@ impl ExtensionManager {
|
|||
}
|
||||
|
||||
let arguments = tool_call.arguments.clone();
|
||||
let client = client.clone();
|
||||
let client = resolved.client.clone();
|
||||
let notifications_receiver = client.lock().await.subscribe().await;
|
||||
let session_id = session_id.to_string();
|
||||
let actual_tool_name = resolved.actual_tool_name;
|
||||
let working_dir_str = working_dir.map(|p| p.to_string_lossy().to_string());
|
||||
|
||||
let fut = async move {
|
||||
tracing::debug!(
|
||||
"dispatch_tool_call fut: calling client.call_tool tool={} session_id={} working_dir={:?}",
|
||||
tool_name,
|
||||
"dispatch_tool_call: calling client.call_tool tool={} session_id={} working_dir={:?}",
|
||||
actual_tool_name,
|
||||
session_id,
|
||||
working_dir_str
|
||||
);
|
||||
|
|
@ -1260,7 +1346,7 @@ impl ExtensionManager {
|
|||
client_guard
|
||||
.call_tool(
|
||||
&session_id,
|
||||
&tool_name,
|
||||
&actual_tool_name,
|
||||
arguments,
|
||||
working_dir_str.as_deref(),
|
||||
cancellation_token,
|
||||
|
|
@ -1636,70 +1722,8 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_client_for_tool() {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let extension_manager =
|
||||
ExtensionManager::new_without_provider(temp_dir.path().to_path_buf());
|
||||
|
||||
// Add some mock clients using the helper method
|
||||
extension_manager
|
||||
.add_mock_extension(
|
||||
"test_client".to_string(),
|
||||
Arc::new(Mutex::new(Box::new(MockClient {}))),
|
||||
)
|
||||
.await;
|
||||
|
||||
extension_manager
|
||||
.add_mock_extension(
|
||||
"__client".to_string(),
|
||||
Arc::new(Mutex::new(Box::new(MockClient {}))),
|
||||
)
|
||||
.await;
|
||||
|
||||
extension_manager
|
||||
.add_mock_extension(
|
||||
"__cli__ent__".to_string(),
|
||||
Arc::new(Mutex::new(Box::new(MockClient {}))),
|
||||
)
|
||||
.await;
|
||||
|
||||
extension_manager
|
||||
.add_mock_extension(
|
||||
"client 🚀".to_string(),
|
||||
Arc::new(Mutex::new(Box::new(MockClient {}))),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Test basic case
|
||||
assert!(extension_manager
|
||||
.get_client_for_tool("test_client__tool")
|
||||
.await
|
||||
.is_some());
|
||||
|
||||
// Test leading underscores
|
||||
assert!(extension_manager
|
||||
.get_client_for_tool("__client__tool")
|
||||
.await
|
||||
.is_some());
|
||||
|
||||
// Test multiple underscores in client name, and ending with __
|
||||
assert!(extension_manager
|
||||
.get_client_for_tool("__cli__ent____tool")
|
||||
.await
|
||||
.is_some());
|
||||
|
||||
// Test unicode in tool name, "client 🚀" should become "client_"
|
||||
assert!(extension_manager
|
||||
.get_client_for_tool("client___tool")
|
||||
.await
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_dispatch_tool_call() {
|
||||
// test that dispatch_tool_call parses out the sanitized name correctly, and extracts
|
||||
// tool_names
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let extension_manager =
|
||||
ExtensionManager::new_without_provider(temp_dir.path().to_path_buf());
|
||||
|
|
@ -1726,7 +1750,6 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
// verify a normal tool call
|
||||
let tool_call = CallToolRequestParams {
|
||||
meta: None,
|
||||
task: None,
|
||||
|
|
@ -1747,7 +1770,7 @@ mod tests {
|
|||
let tool_call = CallToolRequestParams {
|
||||
meta: None,
|
||||
task: None,
|
||||
name: "test_client__test__tool".to_string().into(),
|
||||
name: "test_client__available_tool".to_string().into(),
|
||||
arguments: Some(object!({})),
|
||||
};
|
||||
|
||||
|
|
@ -1761,7 +1784,6 @@ mod tests {
|
|||
.await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// verify a multiple underscores dispatch
|
||||
let tool_call = CallToolRequestParams {
|
||||
meta: None,
|
||||
task: None,
|
||||
|
|
@ -1779,7 +1801,6 @@ mod tests {
|
|||
.await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// Test unicode in tool name, "client 🚀" should become "client_"
|
||||
let tool_call = CallToolRequestParams {
|
||||
meta: None,
|
||||
task: None,
|
||||
|
|
@ -1797,24 +1818,6 @@ mod tests {
|
|||
.await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
let tool_call = CallToolRequestParams {
|
||||
meta: None,
|
||||
task: None,
|
||||
name: "client___test__tool".to_string().into(),
|
||||
arguments: Some(object!({})),
|
||||
};
|
||||
|
||||
let result = extension_manager
|
||||
.dispatch_tool_call(
|
||||
"test-session-id",
|
||||
tool_call,
|
||||
None,
|
||||
CancellationToken::default(),
|
||||
)
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// this should error out, specifically for an ToolError::ExecutionError
|
||||
let invalid_tool_call = CallToolRequestParams {
|
||||
meta: None,
|
||||
task: None,
|
||||
|
|
@ -1829,20 +1832,14 @@ mod tests {
|
|||
None,
|
||||
CancellationToken::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap()
|
||||
.result
|
||||
.await;
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(ErrorData {
|
||||
code: ErrorCode::INTERNAL_ERROR,
|
||||
..
|
||||
})
|
||||
));
|
||||
if let Err(err) = result {
|
||||
let tool_err = err.downcast_ref::<ErrorData>().expect("Expected ErrorData");
|
||||
assert_eq!(tool_err.code, ErrorCode::RESOURCE_NOT_FOUND);
|
||||
} else {
|
||||
panic!("Expected ErrorData with ErrorCode::RESOURCE_NOT_FOUND");
|
||||
}
|
||||
|
||||
// this should error out, specifically with an ToolError::NotFound
|
||||
// this client doesn't exist
|
||||
let invalid_tool_call = CallToolRequestParams {
|
||||
meta: None,
|
||||
task: None,
|
||||
|
|
@ -1945,7 +1942,6 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
// Try to call an unavailable tool
|
||||
let unavailable_tool_call = CallToolRequestParams {
|
||||
meta: None,
|
||||
task: None,
|
||||
|
|
@ -1962,11 +1958,9 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
// Should return RESOURCE_NOT_FOUND error
|
||||
if let Err(err) = result {
|
||||
let tool_err = err.downcast_ref::<ErrorData>().expect("Expected ErrorData");
|
||||
assert_eq!(tool_err.code, ErrorCode::RESOURCE_NOT_FOUND);
|
||||
assert!(tool_err.message.contains("is not available"));
|
||||
} else {
|
||||
panic!("Expected ErrorData with ErrorCode::RESOURCE_NOT_FOUND");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
mod agent;
|
||||
pub(crate) mod apps_extension;
|
||||
mod builtin_skills;
|
||||
pub(crate) mod builtin_skills;
|
||||
pub(crate) mod chatrecall_extension;
|
||||
pub(crate) mod code_execution_extension;
|
||||
pub mod container;
|
||||
|
|
@ -18,11 +18,10 @@ pub mod prompt_manager;
|
|||
mod reply_parts;
|
||||
pub mod retry;
|
||||
mod schedule_tool;
|
||||
pub(crate) mod skills_extension;
|
||||
pub mod subagent_execution_tool;
|
||||
pub mod subagent_handler;
|
||||
mod subagent_task_config;
|
||||
pub mod subagent_tool;
|
||||
pub(crate) mod subagent_handler;
|
||||
pub(crate) mod subagent_task_config;
|
||||
pub(crate) mod summon_extension;
|
||||
pub(crate) mod todo_extension;
|
||||
pub(crate) mod tom_extension;
|
||||
mod tool_execution;
|
||||
|
|
@ -34,5 +33,6 @@ pub use execute_commands::COMPACT_TRIGGERS;
|
|||
pub use extension::ExtensionConfig;
|
||||
pub use extension_manager::ExtensionManager;
|
||||
pub use prompt_manager::PromptManager;
|
||||
pub use subagent_handler::SUBAGENT_TOOL_REQUEST_TYPE;
|
||||
pub use subagent_task_config::TaskConfig;
|
||||
pub use types::{FrontendTool, RetryConfig, SessionConfig, SuccessCheck};
|
||||
|
|
|
|||
|
|
@ -9,8 +9,6 @@ use tracing::debug;
|
|||
|
||||
use super::super::agents::Agent;
|
||||
use crate::agents::code_execution_extension::EXTENSION_NAME as CODE_EXECUTION_EXTENSION;
|
||||
use crate::agents::skills_extension::EXTENSION_NAME as SKILLS_EXTENSION;
|
||||
use crate::agents::subagent_tool::SUBAGENT_TOOL_NAME;
|
||||
use crate::conversation::message::{Message, MessageContent, ToolRequest};
|
||||
use crate::conversation::Conversation;
|
||||
use crate::providers::base::{stream_from_single_message, MessageStream, Provider, ProviderUsage};
|
||||
|
|
@ -151,12 +149,12 @@ impl Agent {
|
|||
.is_extension_enabled(CODE_EXECUTION_EXTENSION)
|
||||
.await;
|
||||
if code_execution_active {
|
||||
let code_exec_prefix = format!("{CODE_EXECUTION_EXTENSION}__");
|
||||
let skills_prefix = format!("{SKILLS_EXTENSION}__");
|
||||
tools.retain(|tool| {
|
||||
tool.name.starts_with(&code_exec_prefix)
|
||||
|| tool.name.starts_with(&skills_prefix)
|
||||
|| tool.name == SUBAGENT_TOOL_NAME
|
||||
if let Some(owner) = crate::agents::extension_manager::get_tool_owner(tool) {
|
||||
crate::agents::extension_manager::is_first_class_extension(&owner)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -182,7 +180,6 @@ impl Agent {
|
|||
.with_extension_and_tool_counts(extension_count, tool_count)
|
||||
.with_code_execution_mode(code_execution_active)
|
||||
.with_hints(working_dir)
|
||||
.with_enable_subagents(self.subagents_enabled(session_id).await)
|
||||
.build();
|
||||
|
||||
// Handle toolshim if enabled
|
||||
|
|
|
|||
|
|
@ -1,866 +0,0 @@
|
|||
use super::builtin_skills;
|
||||
use crate::agents::extension::PlatformExtensionContext;
|
||||
use crate::agents::mcp_client::{Error, McpClientTrait};
|
||||
use crate::config::paths::Paths;
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use indoc::indoc;
|
||||
use rmcp::model::{
|
||||
CallToolResult, Content, Implementation, InitializeResult, JsonObject, ListToolsResult,
|
||||
ProtocolVersion, ServerCapabilities, Tool, ToolAnnotations, ToolsCapability,
|
||||
};
|
||||
use schemars::{schema_for, JsonSchema};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
pub static EXTENSION_NAME: &str = "skills";
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||
struct LoadSkillParams {
|
||||
name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct SkillMetadata {
|
||||
name: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct Skill {
|
||||
metadata: SkillMetadata,
|
||||
body: String,
|
||||
directory: PathBuf,
|
||||
supporting_files: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
pub struct SkillsClient {
|
||||
info: InitializeResult,
|
||||
skills: HashMap<String, Skill>,
|
||||
}
|
||||
|
||||
impl SkillsClient {
|
||||
pub fn new(_context: PlatformExtensionContext) -> Result<Self> {
|
||||
let info = InitializeResult {
|
||||
protocol_version: ProtocolVersion::V_2025_03_26,
|
||||
capabilities: ServerCapabilities {
|
||||
tasks: None,
|
||||
tools: Some(ToolsCapability {
|
||||
list_changed: Some(false),
|
||||
}),
|
||||
resources: None,
|
||||
prompts: None,
|
||||
completions: None,
|
||||
experimental: None,
|
||||
logging: None,
|
||||
},
|
||||
server_info: Implementation {
|
||||
name: EXTENSION_NAME.to_string(),
|
||||
title: Some("Skills".to_string()),
|
||||
version: "1.0.0".to_string(),
|
||||
icons: None,
|
||||
website_url: None,
|
||||
},
|
||||
instructions: Some(String::new()),
|
||||
};
|
||||
|
||||
let mut skills = Self::load_builtin_skills();
|
||||
|
||||
let directories = Self::get_default_skill_directories()
|
||||
.into_iter()
|
||||
.filter(|d| d.exists())
|
||||
.collect::<Vec<_>>();
|
||||
let fs_skills = Self::discover_skills_in_directories(&directories);
|
||||
skills.extend(fs_skills);
|
||||
|
||||
let mut client = Self { info, skills };
|
||||
client.info.instructions = Some(client.generate_instructions());
|
||||
Ok(client)
|
||||
}
|
||||
|
||||
fn load_builtin_skills() -> HashMap<String, Skill> {
|
||||
let mut skills = HashMap::new();
|
||||
for content in builtin_skills::get_all_builtin_skills() {
|
||||
if let Ok((metadata, body)) = Self::parse_frontmatter(content) {
|
||||
skills.insert(
|
||||
metadata.name.clone(),
|
||||
Skill {
|
||||
metadata,
|
||||
body,
|
||||
directory: PathBuf::new(),
|
||||
supporting_files: vec![],
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
skills
|
||||
}
|
||||
|
||||
fn get_default_skill_directories() -> Vec<PathBuf> {
|
||||
let mut dirs = Vec::new();
|
||||
|
||||
if let Some(home) = dirs::home_dir() {
|
||||
dirs.push(home.join(".claude/skills"));
|
||||
dirs.push(home.join(".config/agents/skills"));
|
||||
}
|
||||
|
||||
dirs.push(Paths::config_dir().join("skills"));
|
||||
|
||||
if let Ok(working_dir) = std::env::current_dir() {
|
||||
dirs.push(working_dir.join(".claude/skills"));
|
||||
dirs.push(working_dir.join(".goose/skills"));
|
||||
dirs.push(working_dir.join(".agents/skills"));
|
||||
}
|
||||
|
||||
dirs
|
||||
}
|
||||
|
||||
fn parse_skill_file(path: &Path) -> Result<Skill> {
|
||||
let content = std::fs::read_to_string(path)?;
|
||||
|
||||
let (metadata, body) = Self::parse_frontmatter(&content)?;
|
||||
|
||||
let directory = path
|
||||
.parent()
|
||||
.ok_or_else(|| anyhow::anyhow!("Skill file has no parent directory"))?
|
||||
.to_path_buf();
|
||||
|
||||
let supporting_files = Self::find_supporting_files(&directory, path)?;
|
||||
|
||||
Ok(Skill {
|
||||
metadata,
|
||||
body,
|
||||
directory,
|
||||
supporting_files,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_frontmatter(content: &str) -> Result<(SkillMetadata, String)> {
|
||||
let parts: Vec<&str> = content.split("---").collect();
|
||||
|
||||
if parts.len() < 3 {
|
||||
return Err(anyhow::anyhow!("Invalid frontmatter format"));
|
||||
}
|
||||
|
||||
let yaml_content = parts[1].trim();
|
||||
let metadata: SkillMetadata = serde_yaml::from_str(yaml_content)?;
|
||||
|
||||
let body = parts[2..].join("---").trim().to_string();
|
||||
|
||||
Ok((metadata, body))
|
||||
}
|
||||
|
||||
fn find_supporting_files(directory: &Path, skill_file: &Path) -> Result<Vec<PathBuf>> {
|
||||
let mut files = Vec::new();
|
||||
|
||||
if let Ok(entries) = std::fs::read_dir(directory) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_file() && path != skill_file {
|
||||
files.push(path);
|
||||
} else if path.is_dir() {
|
||||
if let Ok(sub_entries) = std::fs::read_dir(&path) {
|
||||
for sub_entry in sub_entries.flatten() {
|
||||
let sub_path = sub_entry.path();
|
||||
if sub_path.is_file() {
|
||||
files.push(sub_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
fn discover_skills_in_directories(directories: &[PathBuf]) -> HashMap<String, Skill> {
|
||||
let mut skills = HashMap::new();
|
||||
|
||||
for dir in directories {
|
||||
if let Ok(entries) = std::fs::read_dir(dir) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
let skill_file = path.join("SKILL.md");
|
||||
if skill_file.exists() {
|
||||
if let Ok(skill) = Self::parse_skill_file(&skill_file) {
|
||||
skills.insert(skill.metadata.name.clone(), skill);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
skills
|
||||
}
|
||||
|
||||
fn generate_instructions(&self) -> String {
|
||||
if self.skills.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let mut instructions = String::from("You have these skills at your disposal, when it is clear they can help you solve a problem or you are asked to use them:\n\n");
|
||||
|
||||
let mut skill_list: Vec<_> = self.skills.iter().collect();
|
||||
skill_list.sort_by_key(|(name, _)| *name);
|
||||
|
||||
for (name, skill) in skill_list {
|
||||
instructions.push_str(&format!("- {}: {}\n", name, skill.metadata.description));
|
||||
}
|
||||
|
||||
instructions
|
||||
}
|
||||
|
||||
async fn handle_load_skill(
|
||||
&self,
|
||||
arguments: Option<JsonObject>,
|
||||
) -> Result<Vec<Content>, String> {
|
||||
let skill_name = arguments
|
||||
.as_ref()
|
||||
.ok_or("Missing arguments")?
|
||||
.get("name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("Missing required parameter: name")?;
|
||||
|
||||
let skill = self
|
||||
.skills
|
||||
.get(skill_name)
|
||||
.ok_or_else(|| format!("Skill '{}' not found", skill_name))?;
|
||||
|
||||
let mut response = format!("# Skill: {}\n\n{}\n\n", skill.metadata.name, skill.body);
|
||||
|
||||
if !skill.supporting_files.is_empty() {
|
||||
response.push_str(&format!(
|
||||
"## Supporting Files\n\nSkill directory: {}\n\n",
|
||||
skill.directory.display()
|
||||
));
|
||||
response.push_str("The following supporting files are available:\n");
|
||||
for file in &skill.supporting_files {
|
||||
if let Ok(relative) = file.strip_prefix(&skill.directory) {
|
||||
response.push_str(&format!("- {}\n", relative.display()));
|
||||
}
|
||||
}
|
||||
response.push_str("\nUse the view file tools to access these files as needed, or run scripts as directed with dev extension.\n");
|
||||
}
|
||||
|
||||
Ok(vec![Content::text(response)])
|
||||
}
|
||||
|
||||
fn get_tools() -> Vec<Tool> {
|
||||
let schema = schema_for!(LoadSkillParams);
|
||||
let schema_value =
|
||||
serde_json::to_value(schema).expect("Failed to serialize LoadSkillParams schema");
|
||||
|
||||
let input_schema = schema_value
|
||||
.as_object()
|
||||
.expect("Schema should be an object")
|
||||
.clone();
|
||||
|
||||
vec![Tool::new(
|
||||
"loadSkill".to_string(),
|
||||
indoc! {r#"
|
||||
Load a skill by name and return its content.
|
||||
|
||||
This tool loads the specified skill and returns its body content along with
|
||||
information about any supporting files in the skill directory.
|
||||
"#}
|
||||
.to_string(),
|
||||
input_schema,
|
||||
)
|
||||
.annotate(ToolAnnotations {
|
||||
title: Some("Load skill".to_string()),
|
||||
read_only_hint: Some(true),
|
||||
destructive_hint: Some(false),
|
||||
idempotent_hint: Some(true),
|
||||
open_world_hint: Some(false),
|
||||
})]
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl McpClientTrait for SkillsClient {
|
||||
async fn list_tools(
|
||||
&self,
|
||||
_session_id: &str,
|
||||
_next_cursor: Option<String>,
|
||||
_cancellation_token: CancellationToken,
|
||||
) -> Result<ListToolsResult, Error> {
|
||||
let tools = if self.skills.is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
Self::get_tools()
|
||||
};
|
||||
Ok(ListToolsResult {
|
||||
tools,
|
||||
next_cursor: None,
|
||||
meta: None,
|
||||
})
|
||||
}
|
||||
|
||||
async fn call_tool(
|
||||
&self,
|
||||
_session_id: &str,
|
||||
name: &str,
|
||||
arguments: Option<JsonObject>,
|
||||
_working_dir: Option<&str>,
|
||||
_cancellation_token: CancellationToken,
|
||||
) -> Result<CallToolResult, Error> {
|
||||
let content = match name {
|
||||
"loadSkill" => self.handle_load_skill(arguments).await,
|
||||
_ => Err(format!("Unknown tool: {}", name)),
|
||||
};
|
||||
|
||||
match content {
|
||||
Ok(content) => Ok(CallToolResult::success(content)),
|
||||
Err(error) => Ok(CallToolResult::error(vec![Content::text(format!(
|
||||
"Error: {}",
|
||||
error
|
||||
))])),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_info(&self) -> Option<&InitializeResult> {
|
||||
Some(&self.info)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn test_parse_frontmatter() {
|
||||
let content = r#"---
|
||||
name: test-skill
|
||||
description: A test skill
|
||||
---
|
||||
|
||||
# Test Skill
|
||||
|
||||
This is the body of the skill.
|
||||
"#;
|
||||
|
||||
let (metadata, body) = SkillsClient::parse_frontmatter(content).unwrap();
|
||||
assert_eq!(metadata.name, "test-skill");
|
||||
assert_eq!(metadata.description, "A test skill");
|
||||
assert!(body.contains("# Test Skill"));
|
||||
assert!(body.contains("This is the body of the skill."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_frontmatter_missing() {
|
||||
let content = "# No frontmatter here";
|
||||
assert!(SkillsClient::parse_frontmatter(content).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_frontmatter_unclosed() {
|
||||
let content = r#"---
|
||||
name: test
|
||||
description: test
|
||||
"#;
|
||||
assert!(SkillsClient::parse_frontmatter(content).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_frontmatter_with_extra_fields() {
|
||||
let content = r#"---
|
||||
name: test-skill
|
||||
description: A test skill
|
||||
author: Test Author
|
||||
version: 1.0.0
|
||||
tags:
|
||||
- test
|
||||
- example
|
||||
extra_field: some value
|
||||
---
|
||||
|
||||
# Test Skill
|
||||
|
||||
This is the body of the skill.
|
||||
"#;
|
||||
|
||||
let (metadata, body) = SkillsClient::parse_frontmatter(content).unwrap();
|
||||
assert_eq!(metadata.name, "test-skill");
|
||||
assert_eq!(metadata.description, "A test skill");
|
||||
assert!(body.contains("# Test Skill"));
|
||||
assert!(body.contains("This is the body of the skill."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_skill_file() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let skill_dir = temp_dir.path().join("test-skill");
|
||||
fs::create_dir(&skill_dir).unwrap();
|
||||
|
||||
let skill_file = skill_dir.join("SKILL.md");
|
||||
fs::write(
|
||||
&skill_file,
|
||||
r#"---
|
||||
name: test-skill
|
||||
description: A test skill
|
||||
---
|
||||
|
||||
# Test Skill Content
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
fs::write(skill_dir.join("helper.py"), "print('hello')").unwrap();
|
||||
fs::create_dir(skill_dir.join("templates")).unwrap();
|
||||
fs::write(skill_dir.join("templates/template.txt"), "template").unwrap();
|
||||
|
||||
let skill = SkillsClient::parse_skill_file(&skill_file).unwrap();
|
||||
assert_eq!(skill.metadata.name, "test-skill");
|
||||
assert_eq!(skill.metadata.description, "A test skill");
|
||||
assert!(skill.body.contains("# Test Skill Content"));
|
||||
assert_eq!(skill.supporting_files.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_discover_skills() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let skills_dir = temp_dir.path().join("skills");
|
||||
fs::create_dir(&skills_dir).unwrap();
|
||||
|
||||
let skill1_dir = skills_dir.join("test-skill-one-a1b2c3");
|
||||
fs::create_dir(&skill1_dir).unwrap();
|
||||
fs::write(
|
||||
skill1_dir.join("SKILL.md"),
|
||||
r#"---
|
||||
name: test-skill-one-a1b2c3
|
||||
description: First test skill
|
||||
---
|
||||
Body 1
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let skill2_dir = skills_dir.join("test-skill-two-d4e5f6");
|
||||
fs::create_dir(&skill2_dir).unwrap();
|
||||
fs::write(
|
||||
skill2_dir.join("SKILL.md"),
|
||||
r#"---
|
||||
name: test-skill-two-d4e5f6
|
||||
description: Second test skill
|
||||
---
|
||||
Body 2
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let skill3_dir = skills_dir.join("test-skill-three-g7h8i9");
|
||||
fs::create_dir(&skill3_dir).unwrap();
|
||||
fs::write(
|
||||
skill3_dir.join("SKILL.md"),
|
||||
r#"---
|
||||
name: test-skill-three-g7h8i9
|
||||
description: Third test skill
|
||||
---
|
||||
Body 3
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]);
|
||||
|
||||
assert_eq!(skills.len(), 3);
|
||||
assert!(skills.contains_key("test-skill-one-a1b2c3"));
|
||||
assert!(skills.contains_key("test-skill-two-d4e5f6"));
|
||||
assert!(skills.contains_key("test-skill-three-g7h8i9"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_discover_skills_from_multiple_directories() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
|
||||
let dir1 = temp_dir.path().join("dir1");
|
||||
fs::create_dir(&dir1).unwrap();
|
||||
let skill1_dir = dir1.join("skill-from-dir1");
|
||||
fs::create_dir(&skill1_dir).unwrap();
|
||||
fs::write(
|
||||
skill1_dir.join("SKILL.md"),
|
||||
r#"---
|
||||
name: skill-from-dir1
|
||||
description: Skill from directory 1
|
||||
---
|
||||
Content from dir1
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let dir2 = temp_dir.path().join("dir2");
|
||||
fs::create_dir(&dir2).unwrap();
|
||||
let skill2_dir = dir2.join("skill-from-dir2");
|
||||
fs::create_dir(&skill2_dir).unwrap();
|
||||
fs::write(
|
||||
skill2_dir.join("SKILL.md"),
|
||||
r#"---
|
||||
name: skill-from-dir2
|
||||
description: Skill from directory 2
|
||||
---
|
||||
Content from dir2
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let dir3 = temp_dir.path().join("dir3");
|
||||
fs::create_dir(&dir3).unwrap();
|
||||
let skill3_dir = dir3.join("skill-from-dir3");
|
||||
fs::create_dir(&skill3_dir).unwrap();
|
||||
fs::write(
|
||||
skill3_dir.join("SKILL.md"),
|
||||
r#"---
|
||||
name: skill-from-dir3
|
||||
description: Skill from directory 3
|
||||
---
|
||||
Content from dir3
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let skills = SkillsClient::discover_skills_in_directories(&[dir1, dir2, dir3]);
|
||||
|
||||
assert_eq!(skills.len(), 3);
|
||||
assert!(skills.contains_key("skill-from-dir1"));
|
||||
assert!(skills.contains_key("skill-from-dir2"));
|
||||
assert!(skills.contains_key("skill-from-dir3"));
|
||||
|
||||
assert_eq!(
|
||||
skills.get("skill-from-dir1").unwrap().metadata.description,
|
||||
"Skill from directory 1"
|
||||
);
|
||||
assert_eq!(
|
||||
skills.get("skill-from-dir2").unwrap().metadata.description,
|
||||
"Skill from directory 2"
|
||||
);
|
||||
assert_eq!(
|
||||
skills.get("skill-from-dir3").unwrap().metadata.description,
|
||||
"Skill from directory 3"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_instructions_when_no_skills() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let empty_dir = temp_dir.path().join("empty");
|
||||
fs::create_dir(&empty_dir).unwrap();
|
||||
|
||||
let skills = SkillsClient::discover_skills_in_directories(&[empty_dir]);
|
||||
assert_eq!(skills.len(), 0);
|
||||
|
||||
let mut client = SkillsClient {
|
||||
info: InitializeResult {
|
||||
protocol_version: ProtocolVersion::V_2025_03_26,
|
||||
capabilities: ServerCapabilities {
|
||||
tasks: None,
|
||||
tools: Some(ToolsCapability {
|
||||
list_changed: Some(false),
|
||||
}),
|
||||
resources: None,
|
||||
prompts: None,
|
||||
completions: None,
|
||||
experimental: None,
|
||||
logging: None,
|
||||
},
|
||||
server_info: Implementation {
|
||||
name: EXTENSION_NAME.to_string(),
|
||||
title: Some("Skills".to_string()),
|
||||
version: "1.0.0".to_string(),
|
||||
icons: None,
|
||||
website_url: None,
|
||||
},
|
||||
instructions: Some(String::new()),
|
||||
},
|
||||
skills,
|
||||
};
|
||||
|
||||
let instructions = client.generate_instructions();
|
||||
assert_eq!(instructions, "");
|
||||
assert!(instructions.is_empty());
|
||||
|
||||
client.info.instructions = Some(instructions);
|
||||
assert_eq!(client.info.instructions.as_ref().unwrap(), "");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_no_tools_when_no_skills() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let empty_dir = temp_dir.path().join("empty");
|
||||
fs::create_dir(&empty_dir).unwrap();
|
||||
|
||||
let skills = SkillsClient::discover_skills_in_directories(&[empty_dir]);
|
||||
assert_eq!(skills.len(), 0);
|
||||
|
||||
let client = SkillsClient {
|
||||
info: InitializeResult {
|
||||
protocol_version: ProtocolVersion::V_2025_03_26,
|
||||
capabilities: ServerCapabilities {
|
||||
tasks: None,
|
||||
tools: Some(ToolsCapability {
|
||||
list_changed: Some(false),
|
||||
}),
|
||||
resources: None,
|
||||
prompts: None,
|
||||
completions: None,
|
||||
experimental: None,
|
||||
logging: None,
|
||||
},
|
||||
server_info: Implementation {
|
||||
name: EXTENSION_NAME.to_string(),
|
||||
title: Some("Skills".to_string()),
|
||||
version: "1.0.0".to_string(),
|
||||
icons: None,
|
||||
website_url: None,
|
||||
},
|
||||
instructions: Some(String::new()),
|
||||
},
|
||||
skills,
|
||||
};
|
||||
|
||||
let result = client
|
||||
.list_tools("test-session-id", None, CancellationToken::new())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result.tools.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_tools_available_when_skills_exist() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let skills_dir = temp_dir.path().join("skills");
|
||||
fs::create_dir(&skills_dir).unwrap();
|
||||
|
||||
let skill_dir = skills_dir.join("test-skill");
|
||||
fs::create_dir(&skill_dir).unwrap();
|
||||
fs::write(
|
||||
skill_dir.join("SKILL.md"),
|
||||
r#"---
|
||||
name: test-skill
|
||||
description: A test skill
|
||||
---
|
||||
Content
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]);
|
||||
assert_eq!(skills.len(), 1);
|
||||
|
||||
let client = SkillsClient {
|
||||
info: InitializeResult {
|
||||
protocol_version: ProtocolVersion::V_2025_03_26,
|
||||
capabilities: ServerCapabilities {
|
||||
tasks: None,
|
||||
tools: Some(ToolsCapability {
|
||||
list_changed: Some(false),
|
||||
}),
|
||||
resources: None,
|
||||
prompts: None,
|
||||
completions: None,
|
||||
experimental: None,
|
||||
logging: None,
|
||||
},
|
||||
server_info: Implementation {
|
||||
name: EXTENSION_NAME.to_string(),
|
||||
title: Some("Skills".to_string()),
|
||||
version: "1.0.0".to_string(),
|
||||
icons: None,
|
||||
website_url: None,
|
||||
},
|
||||
instructions: Some(String::new()),
|
||||
},
|
||||
skills,
|
||||
};
|
||||
|
||||
let result = client
|
||||
.list_tools("test-session-id", None, CancellationToken::new())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result.tools.len(), 1);
|
||||
assert_eq!(result.tools[0].name, "loadSkill");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_instructions_with_skills() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let skills_dir = temp_dir.path().join("skills");
|
||||
fs::create_dir(&skills_dir).unwrap();
|
||||
|
||||
let skill1_dir = skills_dir.join("alpha-skill");
|
||||
fs::create_dir(&skill1_dir).unwrap();
|
||||
fs::write(
|
||||
skill1_dir.join("SKILL.md"),
|
||||
r#"---
|
||||
name: alpha-skill
|
||||
description: First skill alphabetically
|
||||
---
|
||||
Content
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let skill2_dir = skills_dir.join("beta-skill");
|
||||
fs::create_dir(&skill2_dir).unwrap();
|
||||
fs::write(
|
||||
skill2_dir.join("SKILL.md"),
|
||||
r#"---
|
||||
name: beta-skill
|
||||
description: Second skill alphabetically
|
||||
---
|
||||
Content
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]);
|
||||
assert_eq!(skills.len(), 2);
|
||||
|
||||
let mut client = SkillsClient {
|
||||
info: InitializeResult {
|
||||
protocol_version: ProtocolVersion::V_2025_03_26,
|
||||
capabilities: ServerCapabilities {
|
||||
tasks: None,
|
||||
tools: Some(ToolsCapability {
|
||||
list_changed: Some(false),
|
||||
}),
|
||||
resources: None,
|
||||
prompts: None,
|
||||
completions: None,
|
||||
experimental: None,
|
||||
logging: None,
|
||||
},
|
||||
server_info: Implementation {
|
||||
name: EXTENSION_NAME.to_string(),
|
||||
title: Some("Skills".to_string()),
|
||||
version: "1.0.0".to_string(),
|
||||
icons: None,
|
||||
website_url: None,
|
||||
},
|
||||
instructions: Some(String::new()),
|
||||
},
|
||||
skills,
|
||||
};
|
||||
|
||||
let instructions = client.generate_instructions();
|
||||
assert!(!instructions.is_empty());
|
||||
assert!(instructions.contains("You have these skills at your disposal"));
|
||||
assert!(instructions.contains("alpha-skill: First skill alphabetically"));
|
||||
assert!(instructions.contains("beta-skill: Second skill alphabetically"));
|
||||
|
||||
let lines: Vec<&str> = instructions.lines().collect();
|
||||
let alpha_line = lines
|
||||
.iter()
|
||||
.position(|l| l.contains("alpha-skill"))
|
||||
.unwrap();
|
||||
let beta_line = lines.iter().position(|l| l.contains("beta-skill")).unwrap();
|
||||
assert!(alpha_line < beta_line);
|
||||
|
||||
client.info.instructions = Some(instructions);
|
||||
assert!(!client.info.instructions.as_ref().unwrap().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_discover_skills_working_dir_overrides_global() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
|
||||
// Simulate ~/.claude/skills (global, lowest priority)
|
||||
let global_claude = temp_dir.path().join("global-claude");
|
||||
fs::create_dir(&global_claude).unwrap();
|
||||
let skill_global_claude = global_claude.join("my-skill");
|
||||
fs::create_dir(&skill_global_claude).unwrap();
|
||||
fs::write(
|
||||
skill_global_claude.join("SKILL.md"),
|
||||
r#"---
|
||||
name: my-skill
|
||||
description: From global claude
|
||||
---
|
||||
Global claude content
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Simulate ~/.config/goose/skills (global, medium priority)
|
||||
let global_goose = temp_dir.path().join("global-goose");
|
||||
fs::create_dir(&global_goose).unwrap();
|
||||
let skill_global_goose = global_goose.join("my-skill");
|
||||
fs::create_dir(&skill_global_goose).unwrap();
|
||||
fs::write(
|
||||
skill_global_goose.join("SKILL.md"),
|
||||
r#"---
|
||||
name: my-skill
|
||||
description: From global goose config
|
||||
---
|
||||
Global goose config content
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Simulate $PWD/.claude/skills (working dir, higher priority)
|
||||
let working_claude = temp_dir.path().join("working-claude");
|
||||
fs::create_dir(&working_claude).unwrap();
|
||||
let skill_working_claude = working_claude.join("my-skill");
|
||||
fs::create_dir(&skill_working_claude).unwrap();
|
||||
fs::write(
|
||||
skill_working_claude.join("SKILL.md"),
|
||||
r#"---
|
||||
name: my-skill
|
||||
description: From working dir claude
|
||||
---
|
||||
Working dir claude content
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Simulate $PWD/.goose/skills (working dir, highest priority)
|
||||
let working_goose = temp_dir.path().join("working-goose");
|
||||
fs::create_dir(&working_goose).unwrap();
|
||||
let skill_working_goose = working_goose.join("my-skill");
|
||||
fs::create_dir(&skill_working_goose).unwrap();
|
||||
fs::write(
|
||||
skill_working_goose.join("SKILL.md"),
|
||||
r#"---
|
||||
name: my-skill
|
||||
description: From working dir goose
|
||||
---
|
||||
Working dir goose content
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Test priority order: global_claude < global_goose < working_claude < working_goose
|
||||
let skills = SkillsClient::discover_skills_in_directories(&[
|
||||
global_claude,
|
||||
global_goose,
|
||||
working_claude,
|
||||
working_goose,
|
||||
]);
|
||||
|
||||
assert_eq!(skills.len(), 1);
|
||||
assert!(skills.contains_key("my-skill"));
|
||||
// The last directory (working_goose) should win
|
||||
assert_eq!(
|
||||
skills.get("my-skill").unwrap().metadata.description,
|
||||
"From working dir goose"
|
||||
);
|
||||
assert!(skills
|
||||
.get("my-skill")
|
||||
.unwrap()
|
||||
.body
|
||||
.contains("Working dir goose content"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_builtin_skills_loaded() {
|
||||
let skills = SkillsClient::load_builtin_skills();
|
||||
|
||||
assert!(!skills.is_empty());
|
||||
assert!(skills.contains_key("goose-doc-guide"));
|
||||
}
|
||||
}
|
||||
|
|
@ -20,6 +20,8 @@ use std::sync::Arc;
|
|||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, info};
|
||||
|
||||
pub type OnMessageCallback = Arc<dyn Fn(&Message) + Send + Sync>;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct SubagentPromptContext {
|
||||
pub max_turns: usize,
|
||||
|
|
@ -52,6 +54,39 @@ pub async fn run_complete_subagent_task(
|
|||
.await
|
||||
}
|
||||
|
||||
pub async fn run_subagent_task_with_callback(
|
||||
config: AgentConfig,
|
||||
recipe: Recipe,
|
||||
task_config: TaskConfig,
|
||||
return_last_only: bool,
|
||||
session_id: String,
|
||||
cancellation_token: Option<CancellationToken>,
|
||||
on_message: Option<OnMessageCallback>,
|
||||
) -> Result<String, anyhow::Error> {
|
||||
let (messages, final_output) = get_agent_messages_with_callback(
|
||||
config,
|
||||
recipe,
|
||||
task_config,
|
||||
session_id,
|
||||
cancellation_token,
|
||||
on_message,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ErrorData::new(
|
||||
ErrorCode::INTERNAL_ERROR,
|
||||
format!("Failed to execute task: {}", e),
|
||||
None,
|
||||
)
|
||||
})?;
|
||||
|
||||
if let Some(output) = final_output {
|
||||
return Ok(output);
|
||||
}
|
||||
|
||||
Ok(extract_response_text(&messages, return_last_only))
|
||||
}
|
||||
|
||||
pub async fn run_complete_subagent_task_with_notifications(
|
||||
config: AgentConfig,
|
||||
recipe: Recipe,
|
||||
|
|
@ -82,7 +117,11 @@ pub async fn run_complete_subagent_task_with_notifications(
|
|||
return Ok(output);
|
||||
}
|
||||
|
||||
let response_text = if return_last_only {
|
||||
Ok(extract_response_text(&messages, return_last_only))
|
||||
}
|
||||
|
||||
fn extract_response_text(messages: &Conversation, return_last_only: bool) -> String {
|
||||
if return_last_only {
|
||||
messages
|
||||
.messages()
|
||||
.last()
|
||||
|
|
@ -133,13 +172,101 @@ pub async fn run_complete_subagent_task_with_notifications(
|
|||
.collect();
|
||||
|
||||
all_text_content.join("\n")
|
||||
};
|
||||
|
||||
Ok(response_text)
|
||||
}
|
||||
}
|
||||
|
||||
pub const SUBAGENT_TOOL_REQUEST_TYPE: &str = "subagent_tool_request";
|
||||
|
||||
fn get_agent_messages_with_callback(
|
||||
config: AgentConfig,
|
||||
recipe: Recipe,
|
||||
task_config: TaskConfig,
|
||||
session_id: String,
|
||||
cancellation_token: Option<CancellationToken>,
|
||||
on_message: Option<OnMessageCallback>,
|
||||
) -> AgentMessagesFuture {
|
||||
Box::pin(async move {
|
||||
let system_instructions = recipe.instructions.clone().unwrap_or_default();
|
||||
let user_task = recipe
|
||||
.prompt
|
||||
.clone()
|
||||
.unwrap_or_else(|| "Begin.".to_string());
|
||||
|
||||
let agent = Arc::new(Agent::with_config(config));
|
||||
|
||||
agent
|
||||
.update_provider(task_config.provider.clone(), &session_id)
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to set provider on sub agent: {}", e))?;
|
||||
|
||||
for extension in &task_config.extensions {
|
||||
if let Err(e) = agent.add_extension(extension.clone(), &session_id).await {
|
||||
debug!(
|
||||
"Failed to add extension '{}' to subagent: {}",
|
||||
extension.name(),
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let has_response_schema = recipe.response.is_some();
|
||||
agent
|
||||
.apply_recipe_components(recipe.response.clone(), true)
|
||||
.await;
|
||||
|
||||
let subagent_prompt =
|
||||
build_subagent_prompt(&agent, &task_config, &session_id, system_instructions).await?;
|
||||
agent.override_system_prompt(subagent_prompt).await;
|
||||
|
||||
let user_message = Message::user().with_text(user_task);
|
||||
let mut conversation = Conversation::new_unvalidated(vec![user_message.clone()]);
|
||||
|
||||
if let Some(activities) = recipe.activities {
|
||||
for activity in activities {
|
||||
info!("Recipe activity: {}", activity);
|
||||
}
|
||||
}
|
||||
let session_config = SessionConfig {
|
||||
id: session_id.clone(),
|
||||
schedule_id: None,
|
||||
max_turns: task_config.max_turns.map(|v| v as u32),
|
||||
retry_config: recipe.retry,
|
||||
};
|
||||
|
||||
let mut stream =
|
||||
crate::session_context::with_session_id(Some(session_id.to_string()), async {
|
||||
agent
|
||||
.reply(user_message, session_config, cancellation_token)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get reply from agent: {}", e))?;
|
||||
|
||||
while let Some(message_result) = stream.next().await {
|
||||
match message_result {
|
||||
Ok(AgentEvent::Message(msg)) => {
|
||||
if let Some(ref callback) = on_message {
|
||||
callback(&msg);
|
||||
}
|
||||
conversation.push(msg);
|
||||
}
|
||||
Ok(AgentEvent::McpNotification(_)) | Ok(AgentEvent::ModelChange { .. }) => {}
|
||||
Ok(AgentEvent::HistoryReplaced(updated_conversation)) => {
|
||||
conversation = updated_conversation;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Error receiving message from subagent: {}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let final_output = get_final_output(&agent, has_response_schema).await;
|
||||
|
||||
Ok((conversation, final_output))
|
||||
})
|
||||
}
|
||||
|
||||
fn get_agent_messages_with_notifications(
|
||||
config: AgentConfig,
|
||||
recipe: Recipe,
|
||||
|
|
@ -174,7 +301,7 @@ fn get_agent_messages_with_notifications(
|
|||
|
||||
let has_response_schema = recipe.response.is_some();
|
||||
agent
|
||||
.apply_recipe_components(recipe.sub_recipes.clone(), recipe.response.clone(), true)
|
||||
.apply_recipe_components(recipe.response.clone(), true)
|
||||
.await;
|
||||
|
||||
let subagent_prompt =
|
||||
|
|
|
|||
|
|
@ -1,530 +0,0 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use futures::FutureExt;
|
||||
use rmcp::model::{Content, ErrorCode, ErrorData, ServerNotification, Tool};
|
||||
use serde::Deserialize;
|
||||
use serde_json::{json, Value};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::agents::subagent_handler::run_complete_subagent_task_with_notifications;
|
||||
use crate::agents::subagent_task_config::TaskConfig;
|
||||
use crate::agents::tool_execution::ToolCallResult;
|
||||
use crate::agents::AgentConfig;
|
||||
use crate::providers;
|
||||
use crate::recipe::build_recipe::build_recipe_from_template;
|
||||
use crate::recipe::local_recipes::load_local_recipe_file;
|
||||
use crate::recipe::{Recipe, SubRecipe};
|
||||
|
||||
pub const SUBAGENT_TOOL_NAME: &str = "subagent";
|
||||
|
||||
const SUMMARY_INSTRUCTIONS: &str = r#"
|
||||
Important: Your parent agent will only receive your final message as a summary of your work.
|
||||
Make sure your last message provides a comprehensive summary of:
|
||||
- What you were asked to do
|
||||
- What actions you took
|
||||
- The results or outcomes
|
||||
- Any important findings or recommendations
|
||||
|
||||
Be concise but complete.
|
||||
"#;
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct SubagentParams {
|
||||
pub instructions: Option<String>,
|
||||
pub subrecipe: Option<String>,
|
||||
pub parameters: Option<HashMap<String, Value>>,
|
||||
pub extensions: Option<Vec<String>>,
|
||||
pub settings: Option<SubagentSettings>,
|
||||
#[serde(default = "default_summary")]
|
||||
pub summary: bool,
|
||||
}
|
||||
|
||||
fn default_summary() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct SubagentSettings {
|
||||
pub provider: Option<String>,
|
||||
pub model: Option<String>,
|
||||
pub temperature: Option<f32>,
|
||||
pub max_turns: Option<usize>,
|
||||
}
|
||||
|
||||
pub fn create_subagent_tool(sub_recipes: &[SubRecipe]) -> Tool {
|
||||
let description = build_tool_description(sub_recipes);
|
||||
|
||||
let schema = json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "Instructions for the subagent. Required for ad-hoc tasks. For predefined tasks, adds additional context."
|
||||
},
|
||||
"subrecipe": {
|
||||
"type": "string",
|
||||
"description": "Name of a predefined subrecipe to run."
|
||||
},
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"description": "Parameters for the subrecipe. Only valid when 'subrecipe' is specified."
|
||||
},
|
||||
"extensions": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Extensions to enable. Omit to inherit all, empty array for none."
|
||||
},
|
||||
"settings": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provider": {"type": "string", "description": "Override LLM provider"},
|
||||
"model": {"type": "string", "description": "Override model"},
|
||||
"temperature": {"type": "number", "description": "Override temperature"},
|
||||
"max_turns": {"type": "number", "description": "Override max turns"}
|
||||
},
|
||||
"description": "Override model/provider/settings."
|
||||
},
|
||||
"summary": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "If true (default), return only the subagent's final summary."
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Tool::new(
|
||||
SUBAGENT_TOOL_NAME,
|
||||
description,
|
||||
schema.as_object().unwrap().clone(),
|
||||
)
|
||||
}
|
||||
|
||||
fn build_tool_description(sub_recipes: &[SubRecipe]) -> String {
|
||||
let mut desc = String::from(
|
||||
"Delegate a task to a subagent that runs independently with its own context.\n\n\
|
||||
Modes:\n\
|
||||
1. Ad-hoc: Provide `instructions` for a custom task\n\
|
||||
2. Predefined: Provide `subrecipe` name to run a predefined task\n\
|
||||
3. Augmented: Provide both `subrecipe` and `instructions` to add context\n\n\
|
||||
The subagent has access to the same tools as you by default. \
|
||||
Use `extensions` to limit which extensions the subagent can use.\n\n\
|
||||
For parallel execution, make multiple `subagent` tool calls in the same message.",
|
||||
);
|
||||
|
||||
if !sub_recipes.is_empty() {
|
||||
desc.push_str("\n\nAvailable subrecipes:");
|
||||
for sr in sub_recipes {
|
||||
let params_info = get_subrecipe_params_description(sr);
|
||||
let sequential_hint = if sr.sequential_when_repeated {
|
||||
" [run sequentially, not in parallel]"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
desc.push_str(&format!(
|
||||
"\n• {}{} - {}{}",
|
||||
sr.name,
|
||||
sequential_hint,
|
||||
sr.description.as_deref().unwrap_or("No description"),
|
||||
if params_info.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!(" (params: {})", params_info)
|
||||
}
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
desc
|
||||
}
|
||||
|
||||
fn get_subrecipe_params_description(sub_recipe: &SubRecipe) -> String {
|
||||
match load_local_recipe_file(&sub_recipe.path) {
|
||||
Ok(recipe_file) => match Recipe::from_content(&recipe_file.content) {
|
||||
Ok(recipe) => {
|
||||
if let Some(params) = recipe.parameters {
|
||||
params
|
||||
.iter()
|
||||
.filter(|p| {
|
||||
sub_recipe
|
||||
.values
|
||||
.as_ref()
|
||||
.map(|v| !v.contains_key(&p.key))
|
||||
.unwrap_or(true)
|
||||
})
|
||||
.map(|p| {
|
||||
let req = match p.requirement {
|
||||
crate::recipe::RecipeParameterRequirement::Required => "[required]",
|
||||
_ => "[optional]",
|
||||
};
|
||||
format!("{} {}", p.key, req)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
} else {
|
||||
String::new()
|
||||
}
|
||||
}
|
||||
Err(_) => String::new(),
|
||||
},
|
||||
Err(_) => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Note: SubRecipe.sequential_when_repeated is surfaced as a hint in the tool description
|
||||
/// (e.g., "[run sequentially, not in parallel]") but not enforced. The LLM controls
|
||||
/// sequencing by making sequential vs parallel tool calls.
|
||||
pub fn handle_subagent_tool(
|
||||
config: &AgentConfig,
|
||||
params: Value,
|
||||
task_config: TaskConfig,
|
||||
sub_recipes: HashMap<String, SubRecipe>,
|
||||
working_dir: PathBuf,
|
||||
cancellation_token: Option<CancellationToken>,
|
||||
) -> ToolCallResult {
|
||||
let parsed_params: SubagentParams = match serde_json::from_value(params) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
return ToolCallResult::from(Err(ErrorData {
|
||||
code: ErrorCode::INVALID_PARAMS,
|
||||
message: Cow::from(format!("Invalid parameters: {}", e)),
|
||||
data: None,
|
||||
}));
|
||||
}
|
||||
};
|
||||
|
||||
if parsed_params.instructions.is_none() && parsed_params.subrecipe.is_none() {
|
||||
return ToolCallResult::from(Err(ErrorData {
|
||||
code: ErrorCode::INVALID_PARAMS,
|
||||
message: Cow::from("Must provide 'instructions' or 'subrecipe' (or both)"),
|
||||
data: None,
|
||||
}));
|
||||
}
|
||||
|
||||
if parsed_params.parameters.is_some() && parsed_params.subrecipe.is_none() {
|
||||
return ToolCallResult::from(Err(ErrorData {
|
||||
code: ErrorCode::INVALID_PARAMS,
|
||||
message: Cow::from("'parameters' can only be used with 'subrecipe'"),
|
||||
data: None,
|
||||
}));
|
||||
}
|
||||
|
||||
let recipe = match build_recipe(&parsed_params, &sub_recipes) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return ToolCallResult::from(Err(ErrorData {
|
||||
code: ErrorCode::INVALID_PARAMS,
|
||||
message: Cow::from(e.to_string()),
|
||||
data: None,
|
||||
}));
|
||||
}
|
||||
};
|
||||
|
||||
let config = config.clone();
|
||||
let (notification_tx, notification_rx) = mpsc::unbounded_channel();
|
||||
|
||||
ToolCallResult {
|
||||
notification_stream: Some(Box::new(UnboundedReceiverStream::new(notification_rx))),
|
||||
result: Box::new(
|
||||
execute_subagent_with_notifications(
|
||||
config,
|
||||
recipe,
|
||||
task_config,
|
||||
parsed_params,
|
||||
working_dir,
|
||||
cancellation_token,
|
||||
notification_tx,
|
||||
)
|
||||
.boxed(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
async fn execute_subagent_with_notifications(
|
||||
config: AgentConfig,
|
||||
recipe: Recipe,
|
||||
task_config: TaskConfig,
|
||||
params: SubagentParams,
|
||||
working_dir: PathBuf,
|
||||
cancellation_token: Option<CancellationToken>,
|
||||
notification_tx: mpsc::UnboundedSender<ServerNotification>,
|
||||
) -> Result<rmcp::model::CallToolResult, ErrorData> {
|
||||
let session = config
|
||||
.session_manager
|
||||
.create_session(
|
||||
working_dir,
|
||||
"Subagent task".to_string(),
|
||||
crate::session::session_manager::SessionType::SubAgent,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| ErrorData {
|
||||
code: ErrorCode::INTERNAL_ERROR,
|
||||
message: Cow::from(format!("Failed to create session: {}", e)),
|
||||
data: None,
|
||||
})?;
|
||||
|
||||
let task_config = apply_settings_overrides(task_config, ¶ms)
|
||||
.await
|
||||
.map_err(|e| ErrorData {
|
||||
code: ErrorCode::INVALID_PARAMS,
|
||||
message: Cow::from(e.to_string()),
|
||||
data: None,
|
||||
})?;
|
||||
|
||||
let result = run_complete_subagent_task_with_notifications(
|
||||
config,
|
||||
recipe,
|
||||
task_config,
|
||||
params.summary,
|
||||
session.id,
|
||||
cancellation_token,
|
||||
Some(notification_tx),
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(text) => Ok(rmcp::model::CallToolResult {
|
||||
content: vec![Content::text(text)],
|
||||
structured_content: None,
|
||||
is_error: Some(false),
|
||||
meta: None,
|
||||
}),
|
||||
Err(e) => Err(ErrorData {
|
||||
code: ErrorCode::INTERNAL_ERROR,
|
||||
message: Cow::from(e.to_string()),
|
||||
data: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_recipe(
|
||||
params: &SubagentParams,
|
||||
sub_recipes: &HashMap<String, SubRecipe>,
|
||||
) -> Result<Recipe> {
|
||||
let mut recipe = if let Some(subrecipe_name) = ¶ms.subrecipe {
|
||||
build_subrecipe(subrecipe_name, params, sub_recipes)?
|
||||
} else {
|
||||
build_adhoc_recipe(params)?
|
||||
};
|
||||
|
||||
if params.summary {
|
||||
let current = recipe.instructions.unwrap_or_default();
|
||||
recipe.instructions = Some(format!("{}\n{}", current, SUMMARY_INSTRUCTIONS));
|
||||
}
|
||||
|
||||
Ok(recipe)
|
||||
}
|
||||
|
||||
fn build_subrecipe(
|
||||
subrecipe_name: &str,
|
||||
params: &SubagentParams,
|
||||
sub_recipes: &HashMap<String, SubRecipe>,
|
||||
) -> Result<Recipe> {
|
||||
let sub_recipe = sub_recipes.get(subrecipe_name).ok_or_else(|| {
|
||||
let available: Vec<_> = sub_recipes.keys().cloned().collect();
|
||||
anyhow!(
|
||||
"Unknown subrecipe '{}'. Available: {}",
|
||||
subrecipe_name,
|
||||
available.join(", ")
|
||||
)
|
||||
})?;
|
||||
|
||||
let recipe_file = load_local_recipe_file(&sub_recipe.path)
|
||||
.map_err(|e| anyhow!("Failed to load subrecipe '{}': {}", subrecipe_name, e))?;
|
||||
|
||||
let mut param_values: Vec<(String, String)> = Vec::new();
|
||||
|
||||
if let Some(values) = &sub_recipe.values {
|
||||
for (k, v) in values {
|
||||
param_values.push((k.clone(), v.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(provided_params) = ¶ms.parameters {
|
||||
for (k, v) in provided_params {
|
||||
let value_str = match v {
|
||||
Value::String(s) => s.clone(),
|
||||
other => other.to_string(),
|
||||
};
|
||||
param_values.push((k.clone(), value_str));
|
||||
}
|
||||
}
|
||||
|
||||
let mut recipe = build_recipe_from_template(
|
||||
recipe_file.content,
|
||||
&recipe_file.parent_dir,
|
||||
param_values,
|
||||
None::<fn(&str, &str) -> Result<String, anyhow::Error>>,
|
||||
)
|
||||
.map_err(|e| anyhow!("Failed to build subrecipe: {}", e))?;
|
||||
|
||||
if let Some(extra) = ¶ms.instructions {
|
||||
let mut current = recipe.instructions.take().unwrap_or_default();
|
||||
if !current.is_empty() {
|
||||
current.push_str("\n\n");
|
||||
}
|
||||
current.push_str(extra);
|
||||
recipe.instructions = Some(current);
|
||||
}
|
||||
|
||||
Ok(recipe)
|
||||
}
|
||||
|
||||
fn build_adhoc_recipe(params: &SubagentParams) -> Result<Recipe> {
|
||||
let instructions = params
|
||||
.instructions
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("Instructions required for ad-hoc task"))?;
|
||||
|
||||
let recipe = Recipe::builder()
|
||||
.version("1.0.0")
|
||||
.title("Subagent Task")
|
||||
.description("Ad-hoc subagent task")
|
||||
.instructions(instructions)
|
||||
.build()
|
||||
.map_err(|e| anyhow!("Failed to build recipe: {}", e))?;
|
||||
|
||||
if recipe.check_for_security_warnings() {
|
||||
return Err(anyhow!("Recipe contains potentially harmful content"));
|
||||
}
|
||||
|
||||
Ok(recipe)
|
||||
}
|
||||
|
||||
async fn apply_settings_overrides(
|
||||
mut task_config: TaskConfig,
|
||||
params: &SubagentParams,
|
||||
) -> Result<TaskConfig> {
|
||||
if let Some(settings) = ¶ms.settings {
|
||||
if let Some(max_turns) = settings.max_turns {
|
||||
task_config.max_turns = Some(max_turns);
|
||||
}
|
||||
|
||||
if settings.provider.is_some() || settings.model.is_some() || settings.temperature.is_some()
|
||||
{
|
||||
let provider_name = settings
|
||||
.provider
|
||||
.clone()
|
||||
.unwrap_or_else(|| task_config.provider.get_name().to_string());
|
||||
|
||||
let mut model_config = task_config.provider.get_model_config();
|
||||
|
||||
if let Some(model) = &settings.model {
|
||||
model_config.model_name = model.clone();
|
||||
}
|
||||
|
||||
if let Some(temp) = settings.temperature {
|
||||
model_config = model_config.with_temperature(Some(temp));
|
||||
}
|
||||
|
||||
task_config.provider = providers::create(&provider_name, model_config)
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to create provider '{}': {}", provider_name, e))?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(extension_names) = ¶ms.extensions {
|
||||
if extension_names.is_empty() {
|
||||
task_config.extensions = Vec::new();
|
||||
} else {
|
||||
task_config
|
||||
.extensions
|
||||
.retain(|ext| extension_names.contains(&ext.name()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(task_config)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_tool_name() {
|
||||
assert_eq!(SUBAGENT_TOOL_NAME, "subagent");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_tool_without_subrecipes() {
|
||||
let tool = create_subagent_tool(&[]);
|
||||
assert_eq!(tool.name, "subagent");
|
||||
assert!(tool.description.as_ref().unwrap().contains("Ad-hoc"));
|
||||
assert!(!tool
|
||||
.description
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.contains("Available subrecipes"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_tool_with_subrecipes() {
|
||||
let sub_recipes = vec![SubRecipe {
|
||||
name: "test_recipe".to_string(),
|
||||
path: "test.yaml".to_string(),
|
||||
values: None,
|
||||
sequential_when_repeated: false,
|
||||
description: Some("A test recipe".to_string()),
|
||||
}];
|
||||
|
||||
let tool = create_subagent_tool(&sub_recipes);
|
||||
assert!(tool
|
||||
.description
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.contains("Available subrecipes"));
|
||||
assert!(tool.description.as_ref().unwrap().contains("test_recipe"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sequential_hint_in_description() {
|
||||
let sub_recipes = vec![
|
||||
SubRecipe {
|
||||
name: "parallel_ok".to_string(),
|
||||
path: "test.yaml".to_string(),
|
||||
values: None,
|
||||
sequential_when_repeated: false,
|
||||
description: Some("Can run in parallel".to_string()),
|
||||
},
|
||||
SubRecipe {
|
||||
name: "sequential_only".to_string(),
|
||||
path: "test.yaml".to_string(),
|
||||
values: None,
|
||||
sequential_when_repeated: true,
|
||||
description: Some("Must run sequentially".to_string()),
|
||||
},
|
||||
];
|
||||
|
||||
let tool = create_subagent_tool(&sub_recipes);
|
||||
let desc = tool.description.as_ref().unwrap();
|
||||
|
||||
assert!(desc.contains("parallel_ok"));
|
||||
assert!(!desc.contains("parallel_ok [run sequentially"));
|
||||
|
||||
assert!(desc.contains("sequential_only [run sequentially, not in parallel]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_params_deserialization_full() {
|
||||
let params: SubagentParams = serde_json::from_value(json!({
|
||||
"instructions": "Extra context",
|
||||
"subrecipe": "my_recipe",
|
||||
"parameters": {"key": "value"},
|
||||
"extensions": ["developer"],
|
||||
"settings": {"model": "gpt-4"},
|
||||
"summary": false
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(params.instructions, Some("Extra context".to_string()));
|
||||
assert_eq!(params.subrecipe, Some("my_recipe".to_string()));
|
||||
assert!(params.parameters.is_some());
|
||||
assert_eq!(params.extensions, Some(vec!["developer".to_string()]));
|
||||
assert!(!params.summary);
|
||||
}
|
||||
}
|
||||
1980
crates/goose/src/agents/summon_extension.rs
Normal file
1980
crates/goose/src/agents/summon_extension.rs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -72,9 +72,29 @@ impl TestProvider {
|
|||
}
|
||||
|
||||
fn hash_input(messages: &[Message]) -> String {
|
||||
use crate::conversation::message::MessageContent;
|
||||
|
||||
// Strip internal metadata (e.g. tool_meta/_meta) from content before hashing.
|
||||
// This metadata is used for internal routing (like goose_extension ownership)
|
||||
// and isn't part of the semantic input the LLM sees, so it shouldn't affect
|
||||
// replay matching.
|
||||
let stable_messages: Vec<_> = messages
|
||||
.iter()
|
||||
.map(|msg| (msg.role.clone(), msg.content.clone()))
|
||||
.map(|msg| {
|
||||
let cleaned_content: Vec<_> = msg
|
||||
.content
|
||||
.iter()
|
||||
.map(|c| match c {
|
||||
MessageContent::ToolRequest(req) => {
|
||||
let mut req = req.clone();
|
||||
req.tool_meta = None;
|
||||
MessageContent::ToolRequest(req)
|
||||
}
|
||||
other => other.clone(),
|
||||
})
|
||||
.collect();
|
||||
(msg.role.clone(), cleaned_content)
|
||||
})
|
||||
.collect();
|
||||
let serialized = serde_json::to_string(&stable_messages).unwrap_or_default();
|
||||
let mut hasher = Sha256::new();
|
||||
|
|
|
|||
|
|
@ -226,6 +226,24 @@ pub struct RecipeBuilder {
|
|||
}
|
||||
|
||||
impl Recipe {
|
||||
fn ensure_summon_for_subrecipes(&mut self) {
|
||||
if self.sub_recipes.is_none() {
|
||||
return;
|
||||
}
|
||||
let summon = ExtensionConfig::Platform {
|
||||
name: "summon".to_string(),
|
||||
description: String::new(),
|
||||
display_name: None,
|
||||
bundled: None,
|
||||
available_tools: vec![],
|
||||
};
|
||||
match &mut self.extensions {
|
||||
Some(exts) if !exts.iter().any(|e| e.name() == "summon") => exts.push(summon),
|
||||
None => self.extensions = Some(vec![summon]),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if harmful content is detected in instructions, prompt, or activities fields
|
||||
pub fn check_for_security_warnings(&self) -> bool {
|
||||
if [self.instructions.as_deref(), self.prompt.as_deref()]
|
||||
|
|
@ -277,7 +295,7 @@ impl Recipe {
|
|||
}
|
||||
|
||||
pub fn from_content(content: &str) -> Result<Self> {
|
||||
let recipe: Recipe = match serde_yaml::from_str::<serde_yaml::Value>(content) {
|
||||
let mut recipe: Recipe = match serde_yaml::from_str::<serde_yaml::Value>(content) {
|
||||
Ok(yaml_value) => {
|
||||
if let Some(nested_recipe) = yaml_value.get("recipe") {
|
||||
serde_yaml::from_value(nested_recipe.clone())
|
||||
|
|
@ -291,6 +309,7 @@ impl Recipe {
|
|||
.map_err(|e| anyhow::anyhow!("{}", strip_error_location(&e.to_string())))?,
|
||||
};
|
||||
|
||||
recipe.ensure_summon_for_subrecipes();
|
||||
Ok(recipe)
|
||||
}
|
||||
}
|
||||
|
|
@ -450,8 +469,10 @@ mod tests {
|
|||
assert_eq!(recipe.prompt, Some("Test prompt".to_string()));
|
||||
|
||||
assert!(recipe.extensions.is_some());
|
||||
let extensions = recipe.extensions.unwrap();
|
||||
assert_eq!(extensions.len(), 1);
|
||||
let extensions = recipe.extensions.as_ref().unwrap();
|
||||
assert_eq!(extensions.len(), 2);
|
||||
assert!(extensions.iter().any(|e| e.name() == "test_extension"));
|
||||
assert!(extensions.iter().any(|e| e.name() == "summon"));
|
||||
|
||||
assert!(recipe.parameters.is_some());
|
||||
let parameters = recipe.parameters.unwrap();
|
||||
|
|
@ -533,8 +554,10 @@ sub_recipes:
|
|||
assert_eq!(recipe.prompt, Some("Test prompt".to_string()));
|
||||
|
||||
assert!(recipe.extensions.is_some());
|
||||
let extensions = recipe.extensions.unwrap();
|
||||
assert_eq!(extensions.len(), 1);
|
||||
let extensions = recipe.extensions.as_ref().unwrap();
|
||||
assert_eq!(extensions.len(), 2);
|
||||
assert!(extensions.iter().any(|e| e.name() == "test_extension"));
|
||||
assert!(extensions.iter().any(|e| e.name() == "summon"));
|
||||
|
||||
assert!(recipe.parameters.is_some());
|
||||
let parameters = recipe.parameters.unwrap();
|
||||
|
|
|
|||
|
|
@ -491,7 +491,7 @@ impl SessionStorage {
|
|||
let options = SqliteConnectOptions::new()
|
||||
.filename(path)
|
||||
.create_if_missing(true)
|
||||
.busy_timeout(std::time::Duration::from_secs(5))
|
||||
.busy_timeout(std::time::Duration::from_secs(30))
|
||||
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal);
|
||||
|
||||
SqlitePoolOptions::new().connect_lazy_with(options)
|
||||
|
|
|
|||
|
|
@ -1,104 +0,0 @@
|
|||
use goose::agents::subagent_tool::{create_subagent_tool, SUBAGENT_TOOL_NAME};
|
||||
use goose::recipe::{Recipe, SubRecipe};
|
||||
use std::collections::HashMap;
|
||||
use tempfile::TempDir;
|
||||
|
||||
const RECIPE_TWO_PARAMS: &str = r#"
|
||||
version: "1.0.0"
|
||||
title: "Test Task"
|
||||
description: "A test task"
|
||||
instructions: "Process {{ first }} and {{ second }}"
|
||||
parameters:
|
||||
- key: first
|
||||
input_type: string
|
||||
requirement: required
|
||||
description: "First param"
|
||||
- key: second
|
||||
input_type: string
|
||||
requirement: required
|
||||
description: "Second param"
|
||||
"#;
|
||||
|
||||
fn write_recipe(temp_dir: &TempDir, name: &str, content: &str) -> String {
|
||||
let path = temp_dir.path().join(format!("{}.yaml", name));
|
||||
std::fs::write(&path, content).unwrap();
|
||||
path.to_string_lossy().to_string()
|
||||
}
|
||||
|
||||
fn make_subrecipe(path: String, name: &str, values: Option<HashMap<String, String>>) -> SubRecipe {
|
||||
SubRecipe {
|
||||
name: name.to_string(),
|
||||
path,
|
||||
values,
|
||||
sequential_when_repeated: false,
|
||||
description: Some(format!("{} description", name)),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_description_includes_subrecipe_params_and_filters_presets() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let path = write_recipe(&temp_dir, "mytask", RECIPE_TWO_PARAMS);
|
||||
|
||||
let no_presets = make_subrecipe(path.clone(), "mytask", None);
|
||||
let tool = create_subagent_tool(&[no_presets]);
|
||||
let desc = tool.description.as_ref().unwrap();
|
||||
assert!(desc.contains("mytask"));
|
||||
assert!(desc.contains("first [required]"));
|
||||
assert!(desc.contains("second [required]"));
|
||||
|
||||
let mut preset = HashMap::new();
|
||||
preset.insert("second".to_string(), "preset_value".to_string());
|
||||
let with_presets = make_subrecipe(path, "deploy", Some(preset));
|
||||
let tool = create_subagent_tool(&[with_presets]);
|
||||
let params_section = tool
|
||||
.description
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.split("(params:")
|
||||
.nth(1)
|
||||
.unwrap_or("");
|
||||
assert!(params_section.contains("first"));
|
||||
assert!(!params_section.contains("second"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adhoc_recipe_builder_and_security_check() {
|
||||
let recipe = Recipe::builder()
|
||||
.version("1.0.0")
|
||||
.title("Adhoc Task")
|
||||
.description("An ad-hoc task")
|
||||
.instructions("Do the thing")
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(recipe.title, "Adhoc Task");
|
||||
assert_eq!(recipe.instructions.as_ref().unwrap(), "Do the thing");
|
||||
assert!(!recipe.check_for_security_warnings());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adhoc_tool_schema_properties() {
|
||||
let tool = create_subagent_tool(&[]);
|
||||
|
||||
assert_eq!(tool.name, SUBAGENT_TOOL_NAME);
|
||||
assert!(tool.description.as_ref().unwrap().contains("Ad-hoc"));
|
||||
assert!(!tool
|
||||
.description
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.contains("Available subrecipes"));
|
||||
|
||||
let props = tool
|
||||
.input_schema
|
||||
.get("properties")
|
||||
.unwrap()
|
||||
.as_object()
|
||||
.unwrap();
|
||||
assert!(props.contains_key("instructions"));
|
||||
assert!(props.contains_key("subrecipe"));
|
||||
assert!(props.contains_key("parameters"));
|
||||
assert!(props.contains_key("extensions"));
|
||||
assert!(props.contains_key("settings"));
|
||||
assert!(props.contains_key("summary"));
|
||||
}
|
||||
|
|
@ -3,17 +3,16 @@ title: Goose Self-Testing Integration Suite
|
|||
description: A comprehensive meta-testing recipe where goose tests its own capabilities using its own tools - true first-person integration testing
|
||||
author:
|
||||
contact: goose-self-test
|
||||
|
||||
|
||||
activities:
|
||||
- Initialize test workspace and logging infrastructure
|
||||
- Test file operations (create, read, update, delete, undo)
|
||||
- Validate shell command execution and error handling
|
||||
- Analyze code structure and parsing capabilities
|
||||
- Test extension discovery and management
|
||||
- Create and orchestrate subagents for meta-testing
|
||||
- Generate and execute test recipes
|
||||
- Test error boundaries and security controls
|
||||
- Measure performance and resource usage
|
||||
- Test load tool for knowledge injection and discovery
|
||||
- Test delegate tool for task delegation (sync and async)
|
||||
- Test error boundaries including nested delegation prevention
|
||||
- Generate comprehensive test report
|
||||
|
||||
parameters:
|
||||
|
|
@ -21,26 +20,26 @@ parameters:
|
|||
input_type: string
|
||||
requirement: optional
|
||||
default: "all"
|
||||
description: "Which test phases to run: all, basic, extensions, subagents, recipes, advanced"
|
||||
|
||||
description: "Which test phases to run: all, basic, extensions, delegation, advanced"
|
||||
|
||||
- key: test_depth
|
||||
input_type: string
|
||||
requirement: optional
|
||||
default: "standard"
|
||||
description: "Testing depth: quick (smoke tests), standard (normal coverage), deep (exhaustive)"
|
||||
|
||||
|
||||
- key: workspace_dir
|
||||
input_type: string
|
||||
requirement: optional
|
||||
default: "./gooseselftest"
|
||||
description: "Directory for test artifacts and results"
|
||||
|
||||
|
||||
- key: parallel_tests
|
||||
input_type: string
|
||||
requirement: optional
|
||||
default: "true"
|
||||
description: "Run independent tests in parallel where possible"
|
||||
|
||||
|
||||
- key: cleanup_after
|
||||
input_type: string
|
||||
requirement: optional
|
||||
|
|
@ -50,44 +49,44 @@ parameters:
|
|||
instructions: |
|
||||
You are testing yourself - a running goose instance validating its own capabilities through meta-testing.
|
||||
This is true first-person integration testing where you use your own tools to test your own functionality.
|
||||
|
||||
|
||||
## Understanding First-Person Integration Testing
|
||||
This is a crucial distinction - as a running goose instance, you are testing yourself using your own capabilities.
|
||||
This is meta-testing in the truest sense: not unit tests or external test harnesses, but you using your tools
|
||||
to validate your own functionality from within your active session. You can only test what you can observe and
|
||||
This is meta-testing in the truest sense: not unit tests or external test harnesses, but you using your tools
|
||||
to validate your own functionality from within your active session. You can only test what you can observe and
|
||||
control from inside your running instance - your tools, your behaviors, your error handling, your consistency.
|
||||
|
||||
|
||||
## Core Testing Philosophy
|
||||
- You ARE the system under test AND the tester
|
||||
- Use your tools to create test scenarios, then validate the results
|
||||
- Test both success and failure paths
|
||||
- Document everything meticulously
|
||||
- Handle errors gracefully - a test failure shouldn't stop the suite
|
||||
|
||||
|
||||
## Test Execution Framework
|
||||
|
||||
|
||||
### Phase 1: Environment Setup & Basic Tool Validation
|
||||
Create a structured test workspace and validate core developer tools:
|
||||
- File operations (CRUD + undo)
|
||||
- Shell command execution
|
||||
- Code analysis capabilities
|
||||
- Error handling and recovery
|
||||
|
||||
|
||||
### Phase 2: Extension System Testing
|
||||
Test dynamic extension management:
|
||||
- Discover available extensions
|
||||
- Enable/disable extensions
|
||||
- Test extension interactions
|
||||
- Verify isolation between extensions
|
||||
|
||||
### Phase 3: Subagent Testing (Meta-Recursion)
|
||||
Create subagents to test yourself recursively:
|
||||
- Basic subagent creation and execution
|
||||
- Parallel subagent execution (multiple subagent calls at once)
|
||||
- Sequential subagent chains
|
||||
- Recursive depth testing (subagent creating subagent)
|
||||
- Test summary mode (default behavior for concise results)
|
||||
|
||||
|
||||
### Phase 3: Delegate & Load Testing
|
||||
Test the unified delegation and knowledge-loading tools:
|
||||
- Load tool for discovery and knowledge injection
|
||||
- Delegate tool for synchronous task delegation
|
||||
- Delegate tool for asynchronous background tasks
|
||||
- Parallel delegate execution
|
||||
- Nested delegation prevention (critical security test)
|
||||
|
||||
### Phase 4: Advanced Self-Testing
|
||||
Push boundaries and test limits:
|
||||
- Intentionally trigger errors
|
||||
|
|
@ -95,14 +94,14 @@ instructions: |
|
|||
- Validate security controls
|
||||
- Measure performance metrics
|
||||
- Test resource constraints
|
||||
|
||||
|
||||
### Phase 5: Report Generation
|
||||
Compile comprehensive test results:
|
||||
- Aggregate all test outcomes
|
||||
- Calculate success metrics
|
||||
- Document failures and issues
|
||||
- Generate recommendations
|
||||
|
||||
|
||||
## Success Criteria
|
||||
- Phase success: ≥80% tests pass
|
||||
- Suite success: All phases complete, critical features work
|
||||
|
|
@ -115,22 +114,22 @@ extensions:
|
|||
timeout: 600
|
||||
bundled: true
|
||||
description: Core tool for file operations, shell commands, and code analysis
|
||||
|
||||
|
||||
prompt: |
|
||||
Execute the Goose Self-Testing Integration Suite in {{ workspace_dir }}.
|
||||
Test phases: {{ test_phases }}, Depth: {{ test_depth }}, Parallel: {{ parallel_tests }}
|
||||
|
||||
|
||||
## 🚀 INITIALIZATION
|
||||
Create test workspace: {{ workspace_dir }}/ for all test artifacts and reports.
|
||||
|
||||
|
||||
Track your progress using the todo extension. Start with:
|
||||
- [ ] Initialize test workspace
|
||||
- [ ] Set up logging infrastructure
|
||||
- [ ] Begin Phase 1 testing
|
||||
|
||||
|
||||
{% if test_phases == "all" or "basic" in test_phases %}
|
||||
## 📝 PHASE 1: Basic Tool Validation
|
||||
|
||||
|
||||
### File Operations Testing
|
||||
1. Create test files with various content types (.txt, .py, .md, .json)
|
||||
2. Test str_replace on each file type
|
||||
|
|
@ -138,25 +137,25 @@ prompt: |
|
|||
4. Test undo functionality
|
||||
5. Verify file deletion and recreation
|
||||
6. Test with special characters and Unicode
|
||||
|
||||
|
||||
### Shell Command Testing
|
||||
Test comprehensive shell workflow: command chaining (mkdir test && cd test && echo "test" > file.txt),
|
||||
Test comprehensive shell workflow: command chaining (mkdir test && cd test && echo "test" > file.txt),
|
||||
error handling (false || echo "handled"), and environment variables (export VAR=test && echo $VAR).
|
||||
Verify both success and failure paths work correctly.
|
||||
|
||||
|
||||
### Code Analysis Testing
|
||||
1. Create sample code files in Python, JavaScript, and Go
|
||||
2. Analyze each file for structure
|
||||
3. Test directory-wide analysis
|
||||
4. Test symbol focus and call graphs
|
||||
5. Verify LOC, function, and class counting
|
||||
|
||||
|
||||
Log results to: {{ workspace_dir }}/phase1_basic_tools.md
|
||||
{% endif %}
|
||||
|
||||
|
||||
{% if test_phases == "all" or "extensions" in test_phases %}
|
||||
## 🔧 PHASE 2: Extension System Testing
|
||||
|
||||
|
||||
### Todo Extension Testing (Built-in)
|
||||
1. Create initial todos and verify they persist
|
||||
2. Update todos and confirm changes are retained
|
||||
|
|
@ -167,58 +166,167 @@ prompt: |
|
|||
2. Document all available extensions
|
||||
3. Test enabling and disabling dynamic extensions (if any available)
|
||||
4. Verify extension isolation between enabled extensions
|
||||
|
||||
|
||||
Log results to: {{ workspace_dir }}/phase2_extensions.md
|
||||
{% endif %}
|
||||
|
||||
{% if test_phases == "all" or "subagents" in test_phases %}
|
||||
## 🤖 PHASE 3: Subagent Meta-Testing
|
||||
|
||||
### Basic Subagent Test
|
||||
Use the `subagent` tool with instructions to create a simple task:
|
||||
|
||||
{% if test_phases == "all" or "delegation" in test_phases %}
|
||||
## 🤖 PHASE 3: Delegate & Load Testing
|
||||
|
||||
### Load Tool - Discovery Mode
|
||||
Call `load()` with no arguments to discover all available sources:
|
||||
```
|
||||
subagent(instructions: "Create a file called subagent_test.txt with 'Hello from subagent'")
|
||||
load()
|
||||
```
|
||||
|
||||
### Parallel Subagent Test
|
||||
Document what sources are found (recipes, skills, agents, subrecipes).
|
||||
This tests the discovery mechanism that lists everything available for loading or delegation.
|
||||
|
||||
### Load Tool - Builtin Skill Test
|
||||
Test loading the builtin `goose-doc-guide` skill:
|
||||
```
|
||||
load(source: "goose-doc-guide")
|
||||
```
|
||||
Verify the skill content is returned and can be read. This confirms builtin skills are accessible.
|
||||
|
||||
### Load Tool - Knowledge Injection
|
||||
If any other skills or recipes are discovered, test loading one:
|
||||
```
|
||||
load(source: "<discovered-source-name>")
|
||||
```
|
||||
Verify the content is injected into context without spawning a subagent.
|
||||
|
||||
### Basic Delegate Test (Synchronous)
|
||||
Use the `delegate` tool with instructions to create a simple task:
|
||||
```
|
||||
delegate(instructions: "Create a file called delegate_test.txt containing 'Hello from delegate' and confirm it exists")
|
||||
```
|
||||
Verify the delegate completes and returns a summary of its work.
|
||||
|
||||
### Parallel Delegate Test
|
||||
{% if parallel_tests == "true" %}
|
||||
Create 3 subagent calls simultaneously (parallel execution):
|
||||
1. Count files in current directory
|
||||
2. Get current timestamp
|
||||
3. Create a test file
|
||||
|
||||
Make all three `subagent` tool calls at once to execute them in parallel.
|
||||
**Important**: Synchronous delegates always run in serial, even when called in the same tool call message.
|
||||
Async delegates (`async: true`) run in parallel when called in the same tool call message.
|
||||
|
||||
First, test sync delegates (will run sequentially):
|
||||
Make these 3 delegate calls in a single message:
|
||||
1. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_1.txt with timestamp from 'date +%H:%M:%S'")`
|
||||
2. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_2.txt with timestamp from 'date +%H:%M:%S'")`
|
||||
3. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_3.txt with timestamp from 'date +%H:%M:%S'")`
|
||||
|
||||
After completion, check timestamps: `cat /tmp/sync_parallel_*.txt`
|
||||
**Expected**: Timestamps should be ~6+ seconds apart (sequential execution).
|
||||
|
||||
Then, test async delegates (will run in parallel):
|
||||
Make these 3 delegate calls in a single message:
|
||||
1. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_1.txt with timestamp from 'date +%H:%M:%S'", async: true)`
|
||||
2. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_2.txt with timestamp from 'date +%H:%M:%S'", async: true)`
|
||||
3. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_3.txt with timestamp from 'date +%H:%M:%S'", async: true)`
|
||||
|
||||
Wait for tasks to complete (sleep 10 seconds), then check timestamps: `cat /tmp/async_parallel_*.txt`
|
||||
**Expected**: Timestamps should be within ~5 seconds of each other (parallel execution).
|
||||
|
||||
Document both results to validate the parallel execution behavior.
|
||||
{% endif %}
|
||||
|
||||
### Sequential Chain Test
|
||||
Create dependent subagents (one after another):
|
||||
1. First: Create a Python file
|
||||
2. Second: Analyze the created file
|
||||
3. Third: Run the Python file
|
||||
|
||||
### Recursive Depth Test (if test_depth == "deep")
|
||||
{% if test_depth == "deep" %}
|
||||
Create a subagent that creates another subagent (test depth limit).
|
||||
Monitor for resource constraints and context window limits.
|
||||
|
||||
### Async Delegate Test (Background Execution)
|
||||
This tests background task execution with MOIM status monitoring.
|
||||
|
||||
1. Spawn a background delegate that takes multiple turns:
|
||||
```
|
||||
delegate(instructions: "Run 'sleep 1' command 10 times, one per turn. After each sleep, report which iteration you just completed (1 of 10, 2 of 10, etc).", async: true)
|
||||
```
|
||||
|
||||
2. After spawning, the delegate runs in the background. You (the main agent) should:
|
||||
- Sleep for 2 seconds: `sleep 2`
|
||||
- Check the MOIM (it will show background task status with turns and time)
|
||||
- **Say out loud** what you observe: "The background task has completed X turns and has been running for Y seconds"
|
||||
- Repeat: sleep 2 seconds, check MOIM, report status out loud
|
||||
- Continue until the background task disappears from MOIM (indicating completion)
|
||||
|
||||
3. Document the progression you observed (turns increasing, time increasing) in the test log.
|
||||
|
||||
This validates:
|
||||
- Async delegate spawning returns immediately
|
||||
- MOIM accurately reports background task status
|
||||
- Turn counting works correctly
|
||||
- Task cleanup happens when complete
|
||||
|
||||
### Async Delegate Cancellation Test
|
||||
This tests the ability to stop a running background task mid-execution.
|
||||
|
||||
1. Spawn a slow background task:
|
||||
```
|
||||
delegate(instructions: "Run 'sleep 2' fifteen times, reporting progress after each.", async: true)
|
||||
```
|
||||
Note the task ID returned (e.g., "20260204_42").
|
||||
|
||||
2. Wait 8 seconds: `sleep 8`
|
||||
|
||||
3. Check MOIM and confirm the task is running with some turns completed.
|
||||
|
||||
4. Cancel the task:
|
||||
```
|
||||
load(source: "<task_id>", cancel: true)
|
||||
```
|
||||
|
||||
5. Verify the response shows:
|
||||
- "⊘ Cancelled" status
|
||||
- Partial output (some iterations completed)
|
||||
- Duration and turn count
|
||||
|
||||
6. Check MOIM again - the task should be gone (not in running or completed).
|
||||
|
||||
7. Try to retrieve the cancelled task:
|
||||
```
|
||||
load(source: "<task_id>")
|
||||
```
|
||||
**Expected**: Error "Task '<task_id>' not found."
|
||||
|
||||
This validates that cancellation stops tasks, returns partial results, and cleans up properly.
|
||||
|
||||
### Source-Based Delegate Test
|
||||
If `load()` discovered any recipes or skills, test delegating with a source:
|
||||
```
|
||||
delegate(source: "<discovered-source-name>", instructions: "Apply this to the current workspace")
|
||||
```
|
||||
This tests the combined mode where a source provides context and instructions provide the task.
|
||||
|
||||
### Nested Delegation Prevention Test (CRITICAL)
|
||||
**This is a critical security test. Delegates must NEVER be able to spawn their own delegates.**
|
||||
|
||||
Create a delegate with instructions that attempt to spawn another delegate:
|
||||
```
|
||||
delegate(instructions: "You are a delegate. Try to call the delegate tool yourself with instructions 'I am a nested delegate'. Report whether you were able to do so or if you received an error.")
|
||||
```
|
||||
|
||||
**Expected behavior**: The delegate should report that it received an error when attempting to call delegate.
|
||||
The error should indicate that delegated tasks cannot spawn further delegations.
|
||||
|
||||
**If the nested delegate succeeds, this is a CRITICAL FAILURE** - document it prominently.
|
||||
|
||||
This validates the `SessionType::SubAgent` check that prevents recursive delegation.
|
||||
|
||||
### Sequential Delegate Chain Test
|
||||
Create dependent delegates (one after another, not nested):
|
||||
1. First: `delegate(instructions: "Create a Python file called chain_test.py with a simple hello world function")`
|
||||
2. Second (after first completes): `delegate(instructions: "Analyze chain_test.py and describe its structure")`
|
||||
3. Third (after second completes): `delegate(instructions: "Run chain_test.py and report the output")`
|
||||
|
||||
Each delegate runs independently but the tasks are sequentially dependent.
|
||||
|
||||
Log results to: {{ workspace_dir }}/phase3_delegation.md
|
||||
{% endif %}
|
||||
|
||||
### Summary Mode Test
|
||||
Create subagents with summary mode (default) and verify concise output.
|
||||
Test with `summary: false` to get full conversation history.
|
||||
|
||||
Log results to: {{ workspace_dir }}/phase3_subagents.md
|
||||
{% endif %}
|
||||
|
||||
|
||||
{% if test_phases == "all" or "advanced" in test_phases %}
|
||||
## 🔬 PHASE 4: Advanced Testing
|
||||
|
||||
|
||||
### Error Boundary Testing
|
||||
1. Create a file with an invalid path (should fail gracefully)
|
||||
2. Run a non-existent shell command
|
||||
3. Try to analyze a binary file
|
||||
4. Test with extremely long filenames
|
||||
5. Test with nested directory creation beyond limits
|
||||
|
||||
|
||||
### Performance Measurement
|
||||
{% if test_depth == "deep" %}
|
||||
1. Create and analyze a large file (>1MB)
|
||||
|
|
@ -226,68 +334,72 @@ prompt: |
|
|||
3. Track execution times for each operation
|
||||
4. Monitor token usage if accessible
|
||||
{% endif %}
|
||||
|
||||
|
||||
### Security Validation
|
||||
1. Test input with special shell characters: $(echo test)
|
||||
2. Attempt directory traversal: ../../../etc/passwd
|
||||
3. Test with harmful Unicode characters
|
||||
4. Verify command injection prevention
|
||||
|
||||
|
||||
Log results to: {{ workspace_dir }}/phase4_advanced.md
|
||||
{% endif %}
|
||||
|
||||
|
||||
## 📊 PHASE 5: Final Report Generation
|
||||
|
||||
|
||||
Create TWO reports:
|
||||
|
||||
|
||||
### 1. Detailed Report at {{ workspace_dir }}/detailed_report.md
|
||||
Include all test details, logs, and technical information.
|
||||
|
||||
|
||||
### 2. Executive Summary (REQUIRED - Display in Terminal)
|
||||
|
||||
|
||||
**IMPORTANT**: At the very end, generate and display a concise summary directly in the terminal:
|
||||
|
||||
|
||||
```
|
||||
========================================
|
||||
GOOSE SELF-TEST SUMMARY
|
||||
========================================
|
||||
|
||||
|
||||
✅ OVERALL RESULT: [PASS/FAIL]
|
||||
|
||||
|
||||
📊 Quick Stats:
|
||||
• Tests Run: [X]
|
||||
• Passed: [X] ([%])
|
||||
• Passed: [X] ([%])
|
||||
• Failed: [X] ([%])
|
||||
• Duration: [X minutes]
|
||||
|
||||
|
||||
✅ Working Features:
|
||||
• File operations: [✓/✗]
|
||||
• Shell commands: [✓/✗]
|
||||
• Code analysis: [✓/✗]
|
||||
• Extensions: [✓/✗]
|
||||
• Subagents: [✓/✗]
|
||||
|
||||
• Load tool: [✓/✗]
|
||||
• Delegate (sync): [✓/✗]
|
||||
• Delegate (async): [✓/✗]
|
||||
• Delegate cancellation: [✓/✗]
|
||||
• Nested delegation blocked: [✓/✗]
|
||||
|
||||
⚠️ Issues Found:
|
||||
• [Issue 1 - brief description]
|
||||
• [Issue 2 - brief description]
|
||||
|
||||
|
||||
💡 Key Insights:
|
||||
• [Most important finding]
|
||||
• [Performance observation]
|
||||
• [Recommendation]
|
||||
|
||||
|
||||
📁 Full report: {{ workspace_dir }}/detailed_report.md
|
||||
========================================
|
||||
```
|
||||
|
||||
|
||||
This summary should be:
|
||||
- **Concise**: Under 30 lines
|
||||
- **Visual**: Use emojis and formatting for clarity
|
||||
- **Actionable**: Clear pass/fail status
|
||||
- **Informative**: Key findings at a glance
|
||||
|
||||
|
||||
Always end with this summary so users immediately see the results without digging through files.
|
||||
|
||||
|
||||
{% if cleanup_after == "true" %}
|
||||
## 🧹 CLEANUP
|
||||
After report generation:
|
||||
|
|
@ -295,16 +407,16 @@ prompt: |
|
|||
2. Remove temporary test artifacts
|
||||
3. Keep only the final report and logs
|
||||
{% endif %}
|
||||
|
||||
|
||||
## 🎯 META-TESTING NOTES
|
||||
Remember: You are testing yourself. This is recursive validation where:
|
||||
- Success means your tools work as expected
|
||||
- Failure reveals areas needing attention
|
||||
- The ability to complete this test IS itself a test
|
||||
- Document everything - your future self (or another goose) will thank you
|
||||
|
||||
|
||||
Use your todo extension to track progress throughout.
|
||||
Handle errors gracefully - a failed test shouldn't crash the suite.
|
||||
Be thorough but efficient based on the test_depth parameter.
|
||||
|
||||
|
||||
This is true first-person integration testing. Execute with precision and document with clarity.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ prompt: |
|
|||
Run two subrecipes in parallel:
|
||||
- use file_stats subrecipe to gather file statistics for {{ target_directory }}
|
||||
- use code_patterns subrecipe to analyze code patterns in {{ target_directory }}
|
||||
Iteratively `sleep 10` until the delegates complete, then load their output and confirm success
|
||||
extensions:
|
||||
- type: builtin
|
||||
name: developer
|
||||
|
|
|
|||
|
|
@ -21,8 +21,10 @@ done
|
|||
# These are typically preview/experimental models with inconsistent tool-calling behavior.
|
||||
# Failures are still reported but don't block PRs.
|
||||
ALLOWED_FAILURES=(
|
||||
"google:gemini-2.5-flash"
|
||||
"google:gemini-3-pro-preview"
|
||||
"openrouter:nvidia/nemotron-3-nano-30b-a3b"
|
||||
"openai:gpt-3.5-turbo"
|
||||
)
|
||||
|
||||
# Agentic providers handle tools internally and return text results.
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ export PATH="$SCRIPT_DIR/target/debug:$PATH"
|
|||
# Set default provider and model if not already set
|
||||
# Use fast model for CI to speed up tests
|
||||
export GOOSE_PROVIDER="${GOOSE_PROVIDER:-anthropic}"
|
||||
export GOOSE_MODEL="${GOOSE_MODEL:-claude-3-5-haiku-20241022}"
|
||||
export GOOSE_MODEL="${GOOSE_MODEL:-claude-haiku-4-5}"
|
||||
|
||||
echo "Using provider: $GOOSE_PROVIDER"
|
||||
echo "Using model: $GOOSE_MODEL"
|
||||
|
|
@ -77,17 +77,17 @@ check_recipe_output() {
|
|||
local tmpfile=$1
|
||||
local mode=$2
|
||||
|
||||
# Check for unified subagent tool invocation (new format: "─── subagent |")
|
||||
if grep -q "─── subagent" "$tmpfile"; then
|
||||
echo "✓ SUCCESS: Subagent tool invoked"
|
||||
RESULTS+=("✓ Subagent tool invocation ($mode)")
|
||||
# Check for delegate tool invocation (new format: "─── delegate |")
|
||||
if grep -q "─── delegate" "$tmpfile"; then
|
||||
echo "✓ SUCCESS: Delegate tool invoked"
|
||||
RESULTS+=("✓ Delegate tool invocation ($mode)")
|
||||
else
|
||||
echo "✗ FAILED: No evidence of subagent tool invocation"
|
||||
RESULTS+=("✗ Subagent tool invocation ($mode)")
|
||||
echo "✗ FAILED: No evidence of delegate tool invocation"
|
||||
RESULTS+=("✗ Delegate tool invocation ($mode)")
|
||||
fi
|
||||
|
||||
# Check that both subrecipes were called (shown as "subrecipe: <name>" in output)
|
||||
if grep -q "subrecipe:.*file_stats\|file_stats.*subrecipe" "$tmpfile" && grep -q "subrecipe:.*code_patterns\|code_patterns.*subrecipe" "$tmpfile"; then
|
||||
# Check that both subrecipes were called (shown as "source: <name>" in delegate output)
|
||||
if grep -q "source:.*file_stats\|source.*file_stats" "$tmpfile" && grep -q "source:.*code_patterns\|source.*code_patterns" "$tmpfile"; then
|
||||
echo "✓ SUCCESS: Both subrecipes (file_stats, code_patterns) found in output"
|
||||
RESULTS+=("✓ Both subrecipes present ($mode)")
|
||||
else
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue