fix: add strict:false to Responses API tools and gpt-5.4 to known models (#8636)
Some checks are pending
Canary / Prepare Version (push) Waiting to run
Canary / build-cli (push) Blocked by required conditions
Canary / Upload Install Script (push) Blocked by required conditions
Canary / bundle-desktop (push) Blocked by required conditions
Canary / bundle-desktop-intel (push) Blocked by required conditions
Canary / bundle-desktop-linux (push) Blocked by required conditions
Canary / bundle-desktop-windows (push) Blocked by required conditions
Canary / Release (push) Blocked by required conditions
Cargo Deny / deny (push) Waiting to run
Unused Dependencies / machete (push) Waiting to run
CI / changes (push) Waiting to run
CI / Check Rust Code Format (push) Blocked by required conditions
CI / Build and Test Rust Project (push) Blocked by required conditions
CI / Build Rust Project on Windows (push) Waiting to run
CI / Check MSRV (push) Blocked by required conditions
CI / Lint Rust Code (push) Blocked by required conditions
CI / Check Generated Schemas are Up-to-Date (push) Blocked by required conditions
CI / Test and Lint Electron Desktop App (push) Blocked by required conditions
Deploy Documentation / deploy (push) Waiting to run
Goose 2 CI / Lint & Format (push) Waiting to run
Goose 2 CI / Unit Tests (push) Waiting to run
Goose 2 CI / Desktop Build & E2E (push) Waiting to run
Goose 2 CI / Rust Lint (push) Waiting to run
Live Provider Tests / Build Binary (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (Code Execution) (push) Blocked by required conditions
Live Provider Tests / Compaction Tests (push) Blocked by required conditions
Live Provider Tests / check-fork (push) Waiting to run
Live Provider Tests / changes (push) Blocked by required conditions
Live Provider Tests / goose server HTTP integration tests (push) Blocked by required conditions
Publish Ask AI Bot Docker Image / docker (push) Waiting to run
Publish Docker Image / docker (push) Waiting to run
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run

Signed-off-by: Bradley Axen <baxen@squareup.com>
This commit is contained in:
Bradley Axen 2026-04-21 15:32:24 -07:00 committed by GitHub
parent 7325fbdae3
commit ab0c40d26a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 474 additions and 130 deletions

View file

@ -1,7 +1,12 @@
use std::sync::{Arc, Mutex};
pub const TEST_SESSION_ID: &str = "test-session-id";
pub const TEST_MODEL: &str = "gpt-5-nano";
// Use a Chat Completions model so the canned SSE fixtures (which return
// Chat Completions format) are parsed correctly. gpt-5-nano now routes to
// the Responses API which needs a different mock format.
// TODO: add a Responses API mock to OpenAiFixture so tests can cover
// responses-routed models like gpt-5-nano end-to-end.
pub const TEST_MODEL: &str = "gpt-4.1";
const NOT_YET_SET: &str = "session-id-not-yet-set";
pub(crate) const SESSION_ID_HEADER: &str = "agent-session-id";

View file

@ -138,9 +138,22 @@ impl ModelConfig {
}
}
if let Some(canonical) =
// Try canonical lookup with the full model name first, then fall back
// to the name with reasoning-effort suffixes stripped (e.g.
// "databricks-gpt-5.4-high" → "databricks-gpt-5.4").
let canonical =
crate::providers::canonical::maybe_get_canonical_model(provider_name, &self.model_name)
{
.or_else(|| {
let (base, _effort) =
crate::providers::utils::extract_reasoning_effort(&self.model_name);
if base != self.model_name {
crate::providers::canonical::maybe_get_canonical_model(provider_name, &base)
} else {
None
}
});
if let Some(canonical) = canonical {
if self.context_limit.is_none() {
self.context_limit = Some(canonical.limit.context);
}
@ -299,15 +312,7 @@ impl ModelConfig {
}
pub fn is_openai_reasoning_model(&self) -> bool {
const DATABRICKS_MODEL_NAME_PREFIXES: &[&str] = &["goose-", "databricks-"];
const REASONING_PREFIXES: &[&str] = &["o1", "o3", "o4", "gpt-5"];
let base = DATABRICKS_MODEL_NAME_PREFIXES
.iter()
.find_map(|p| self.model_name.strip_prefix(p))
.unwrap_or(&self.model_name);
REASONING_PREFIXES.iter().any(|p| base.starts_with(p))
crate::providers::utils::is_openai_responses_model(&self.model_name)
}
pub fn max_output_tokens(&self) -> i32 {
@ -499,6 +504,28 @@ mod tests {
assert_eq!(config.max_tokens, None);
assert_eq!(config.reasoning, None);
}
#[test]
fn resolves_after_stripping_reasoning_effort_suffix() {
let _guard = env_lock::lock_env([
("GOOSE_MAX_TOKENS", None::<&str>),
("GOOSE_CONTEXT_LIMIT", None::<&str>),
]);
// "databricks-gpt-5.4-high" should resolve via "databricks-gpt-5.4"
let config = ModelConfig::new_or_fail("databricks-gpt-5.4-high")
.with_canonical_limits("databricks");
assert_eq!(config.context_limit, Some(1_050_000));
// "gpt-5.4-xhigh" should resolve via "gpt-5.4"
let config = ModelConfig::new_or_fail("gpt-5.4-xhigh").with_canonical_limits("openai");
assert_eq!(config.context_limit, Some(1_050_000));
// "gpt-5.4-nano-low" should resolve via "gpt-5.4-nano"
let config =
ModelConfig::new_or_fail("gpt-5.4-nano-low").with_canonical_limits("openai");
assert_eq!(config.context_limit, Some(400_000));
}
}
mod is_openai_reasoning_model {

View file

@ -269,17 +269,19 @@ impl DatabricksProvider {
}
fn is_responses_model(model_name: &str) -> bool {
let normalized = model_name.to_ascii_lowercase();
normalized.contains("codex")
super::utils::is_openai_responses_model(model_name)
}
fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String {
if is_embedding {
"serving-endpoints/text-embedding-3-small/invocations".to_string()
} else if Self::is_responses_model(model_name) {
"serving-endpoints/responses".to_string()
} else {
format!("serving-endpoints/{}/invocations", model_name)
let (clean_name, _) = super::utils::extract_reasoning_effort(model_name);
if Self::is_responses_model(&clean_name) {
"serving-endpoints/responses".to_string()
} else {
format!("serving-endpoints/{}/invocations", clean_name)
}
}
}
@ -594,3 +596,48 @@ impl EmbeddingCapable for DatabricksProvider {
Ok(embeddings)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_provider() -> DatabricksProvider {
DatabricksProvider {
api_client: super::super::api_client::ApiClient::new(
"https://example.com".to_string(),
super::super::api_client::AuthMethod::NoAuth,
)
.unwrap(),
auth: DatabricksAuth::Token("fake".into()),
model: ModelConfig::new_or_fail("databricks-gpt-5.4"),
image_format: ImageFormat::OpenAi,
retry_config: RetryConfig::default(),
fast_retry_config: RetryConfig::new(0, 0, 1.0, 0),
name: "databricks".into(),
token_cache: std::sync::Arc::new(std::sync::Mutex::new(None)),
instance_id: None,
}
}
#[test]
fn responses_models_route_to_responses_endpoint() {
let provider = test_provider();
for (model_name, expected_path) in [
("gpt-5.4", "serving-endpoints/responses"),
("databricks-gpt-5.4-high", "serving-endpoints/responses"),
("databricks-gpt-5-4-xhigh", "serving-endpoints/responses"),
("o3-mini", "serving-endpoints/responses"),
(
"databricks-claude-sonnet-4",
"serving-endpoints/databricks-claude-sonnet-4/invocations",
),
] {
assert_eq!(
provider.get_endpoint_path(model_name, false),
expected_path,
"unexpected endpoint for {model_name}"
);
}
}
}

View file

@ -2,8 +2,9 @@ use crate::conversation::message::{Message, MessageContent};
use crate::model::ModelConfig;
use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType};
use crate::providers::utils::{
convert_image, detect_image_path, is_valid_function_name, load_image_file, safely_parse_json,
sanitize_function_name, ImageFormat,
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
ImageFormat,
};
use anyhow::{anyhow, Error};
use rmcp::model::{
@ -581,24 +582,8 @@ pub fn create_request(
));
}
let is_openai_reasoning_model = model_config.is_openai_reasoning_model();
let (model_name, reasoning_effort) = if is_openai_reasoning_model {
let parts: Vec<&str> = model_config.model_name.split('-').collect();
let last_part = parts.last().unwrap();
match *last_part {
"low" | "medium" | "high" => {
let base_name = parts[..parts.len() - 1].join("-");
(base_name, Some(last_part.to_string()))
}
_ => (
model_config.model_name.to_string(),
Some("medium".to_string()),
),
}
} else {
(model_config.model_name.to_string(), None)
};
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
let is_openai_reasoning_model = is_openai_responses_model(&model_name);
let system_message = DatabricksMessage {
role: "system".to_string(),
@ -1073,6 +1058,63 @@ mod tests {
Ok(())
}
#[test]
fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> {
let model_config = ModelConfig {
model_name: "o3-xhigh".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["model"], "o3");
assert_eq!(request["reasoning_effort"], "xhigh");
Ok(())
}
#[test]
fn test_create_request_reasoning_effort_none() -> anyhow::Result<()> {
let model_config = ModelConfig {
model_name: "o3-none".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["model"], "o3");
assert_eq!(request["reasoning_effort"], "none");
Ok(())
}
#[test]
fn test_create_request_reasoning_effort_for_prefixed_gpt5_model() -> anyhow::Result<()> {
let model_config = ModelConfig {
model_name: "databricks-gpt-5.4-high".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["model"], "databricks-gpt-5.4");
assert_eq!(request["reasoning_effort"], "high");
Ok(())
}
#[test]
fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> {
let _guard = env_lock::lock_env([

View file

@ -4,8 +4,9 @@ use crate::model::ModelConfig;
use crate::providers::base::{ProviderUsage, Usage};
use crate::providers::errors::ProviderError;
use crate::providers::utils::{
convert_image, detect_image_path, extract_reasoning_effort, is_valid_function_name,
load_image_file, safely_parse_json, sanitize_function_name, ImageFormat,
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
ImageFormat,
};
use anyhow::{anyhow, Error};
use async_stream::try_stream;
@ -984,7 +985,7 @@ pub fn create_request(
}
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
let is_reasoning_model = reasoning_effort.is_some();
let is_reasoning_model = is_openai_responses_model(&model_name);
let system_message = json!({
"role": if is_reasoning_model { "developer" } else { "system" },
@ -1716,7 +1717,8 @@ mod tests {
#[test]
fn test_create_request_o1_default() -> anyhow::Result<()> {
// Test default medium reasoning effort for O1 model
// Without an explicit effort suffix the API picks its own default;
// we should omit reasoning_effort entirely but still use "developer" role.
let model_config = ModelConfig {
model_name: "o1".to_string(),
context_limit: Some(4096),
@ -1745,13 +1747,16 @@ mod tests {
"content": "system"
}
],
"reasoning_effort": "medium",
"max_completion_tokens": 1024
});
for (key, value) in expected.as_object().unwrap() {
assert_eq!(obj.get(key).unwrap(), value);
}
assert!(
obj.get("reasoning_effort").is_none(),
"reasoning_effort should be omitted when no explicit suffix is provided"
);
Ok(())
}

View file

@ -2,7 +2,7 @@ use crate::conversation::message::{Message, MessageContent};
use crate::mcp_utils::extract_text_from_resource;
use crate::model::ModelConfig;
use crate::providers::base::{ProviderUsage, Usage};
use crate::providers::utils::extract_reasoning_effort;
use crate::providers::utils::{extract_reasoning_effort, is_openai_responses_model};
use anyhow::{anyhow, Error};
use async_stream::try_stream;
use chrono;
@ -468,7 +468,10 @@ pub fn create_responses_request(
add_message_items(&mut input_items, messages);
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
let is_reasoning_model = reasoning_effort.is_some();
// All models routed here are responses-capable; temperature is rejected
// by the API for reasoning models regardless of whether an explicit
// effort suffix was provided.
let is_reasoning_model = is_openai_responses_model(&model_name);
let mut payload = json!({
"model": model_name,
@ -495,6 +498,7 @@ pub fn create_responses_request(
"name": tool.name,
"description": tool.description,
"parameters": tool.input_schema,
"strict": false,
})
})
.collect();
@ -1083,4 +1087,103 @@ mod tests {
assert_eq!(info.effort.as_deref(), Some("high"));
assert_eq!(info.summary.as_deref(), Some("Thought deeply"));
}
#[test]
fn test_responses_tools_include_strict_false() {
let model_config = ModelConfig {
model_name: "gpt-5.4".to_string(),
context_limit: None,
temperature: None,
max_tokens: None,
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let tool = Tool::new(
"shell",
"Execute a shell command",
object!({
"type": "object",
"properties": {
"command": {
"type": "string",
"description": "The command to run"
}
},
"required": ["command"]
}),
);
let result =
create_responses_request(&model_config, "You are helpful.", &[], &[tool]).unwrap();
let tools = result["tools"]
.as_array()
.expect("tools should be an array");
assert_eq!(tools.len(), 1);
assert_eq!(tools[0]["strict"], json!(false),
"Responses API defaults strict to true, but MCP tool schemas are not strict-compatible; must explicitly set strict: false");
}
#[test]
fn test_responses_request_with_explicit_effort_suffix() {
for (model_name, expected_model, expected_effort) in [
("gpt-5.4-xhigh", "gpt-5.4", "xhigh"),
("databricks-gpt-5.4-high", "databricks-gpt-5.4", "high"),
("databricks-o3-none", "databricks-o3", "none"),
] {
let model_config = ModelConfig {
model_name: model_name.to_string(),
context_limit: None,
temperature: None,
max_tokens: None,
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let result =
create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap();
assert_eq!(
result["model"], expected_model,
"unexpected model for {model_name}"
);
assert_eq!(
result["reasoning"]["effort"], expected_effort,
"unexpected effort for {model_name}"
);
assert_eq!(result["reasoning"]["summary"], "auto");
}
}
#[test]
fn test_responses_request_without_effort_suffix_omits_reasoning() {
for model_name in ["gpt-5.4", "o3", "gpt-5-nano"] {
let model_config = ModelConfig {
model_name: model_name.to_string(),
context_limit: None,
temperature: None,
max_tokens: None,
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let result =
create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap();
assert_eq!(result["model"], model_name, "model should be unchanged");
assert!(
result.get("reasoning").is_none(),
"reasoning should be omitted for {model_name} without explicit effort suffix"
);
}
}
}

View file

@ -48,9 +48,21 @@ pub const OPEN_AI_KNOWN_MODELS: &[(&str, usize)] = &[
("gpt-3.5-turbo", 16_385),
("gpt-4-turbo", 128_000),
("o4-mini", 128_000),
("gpt-5", 400_000),
("gpt-5-mini", 400_000),
("gpt-5-nano", 400_000),
("gpt-5.1-codex", 400_000),
("gpt-5-pro", 400_000),
("gpt-5-codex", 400_000),
("gpt-5.1", 400_000),
("gpt-5.1-codex", 400_000),
("gpt-5.2", 400_000),
("gpt-5.2-codex", 400_000),
("gpt-5.2-pro", 400_000),
("gpt-5.3-codex", 400_000),
("gpt-5.4", 1_050_000),
("gpt-5.4-mini", 400_000),
("gpt-5.4-nano", 400_000),
("gpt-5.4-pro", 1_050_000),
];
pub const OPEN_AI_DOC_URL: &str = "https://platform.openai.com/docs/models";
@ -283,10 +295,7 @@ impl OpenAiProvider {
}
fn is_responses_model(model_name: &str) -> bool {
let normalized_model = model_name.to_ascii_lowercase();
(normalized_model.starts_with("gpt-5") && normalized_model.contains("codex"))
|| normalized_model.starts_with("gpt-5.2-pro")
|| normalized_model.starts_with("gpt-5.4")
super::utils::is_openai_responses_model(model_name)
}
fn should_use_responses_api(model_name: &str, base_path: &str) -> bool {
@ -834,59 +843,20 @@ mod tests {
}
#[test]
fn gpt_5_2_codex_uses_responses_when_base_path_is_default() {
assert!(OpenAiProvider::should_use_responses_api(
"gpt-5.2-codex",
"v1/chat/completions"
));
}
#[test]
fn gpt_5_2_pro_uses_responses_when_base_path_is_default() {
assert!(OpenAiProvider::should_use_responses_api(
"gpt-5.2-pro",
"v1/chat/completions"
));
}
#[test]
fn gpt_5_2_pro_with_date_uses_responses() {
assert!(OpenAiProvider::should_use_responses_api(
"gpt-5.2-pro-2025-12-11",
"v1/chat/completions"
));
}
#[test]
fn explicit_chat_path_forces_chat_completions() {
assert!(!OpenAiProvider::should_use_responses_api(
"gpt-5.2-codex",
"openai/v1/chat/completions"
));
}
#[test]
fn gpt_5_4_uses_responses_when_base_path_is_default() {
assert!(OpenAiProvider::should_use_responses_api(
"gpt-5.4",
"v1/chat/completions"
));
}
#[test]
fn gpt_5_4_with_date_uses_responses() {
assert!(OpenAiProvider::should_use_responses_api(
"gpt-5.4-2026-03-01",
"v1/chat/completions"
));
}
#[test]
fn gpt_4o_does_not_use_responses() {
assert!(!OpenAiProvider::should_use_responses_api(
"gpt-4o",
"v1/chat/completions"
));
fn responses_api_routing_uses_model_family_unless_path_forces_chat() {
for (model_name, base_path, expected) in [
("gpt-5.4", "v1/chat/completions", true),
("gpt-5.4-xhigh", "v1/chat/completions", true),
("gpt-5.2-pro-2025-12-11", "v1/chat/completions", true),
("gpt-4o", "v1/chat/completions", false),
("gpt-5.2-codex", "openai/v1/chat/completions", false),
] {
assert_eq!(
OpenAiProvider::should_use_responses_api(model_name, base_path),
expected,
"unexpected routing for {model_name} via {base_path}"
);
}
}
#[test]

View file

@ -193,26 +193,48 @@ pub async fn handle_response_google_compat(response: Response) -> Result<Value,
}
}
pub fn extract_reasoning_effort(model_name: &str) -> (String, Option<String>) {
let is_reasoning_model = model_name.starts_with("o1")
|| model_name.starts_with("o2")
|| model_name.starts_with("o3")
|| model_name.starts_with("o4")
|| model_name.starts_with("gpt-5");
/// True when the model should use the OpenAI Responses API.
///
/// The Responses API is backwards-compatible with all OpenAI reasoning
/// models, so every `o`-series (`o1`, `o3`, `o4`, …) and `gpt-5` variant
/// routes here. The matcher intentionally scans the full model identifier so
/// hosted aliases like `databricks-gpt-5.4`, `goose-o3-mini`, or
/// `headless-goose-o3-mini` work without provider-specific normalization.
pub fn is_openai_responses_model(model_name: &str) -> bool {
static RE: OnceLock<Regex> = OnceLock::new();
let re =
RE.get_or_init(|| Regex::new(r"(?i)(?:^|[-/])(?:o\d+(?:$|-)|gpt-5(?:$|[-.]))").unwrap());
re.is_match(model_name)
}
if !is_reasoning_model {
/// Extract an explicit reasoning-effort suffix from a model name.
///
/// Returns `(base_model_name, Some(effort))` when the user appended a
/// recognised suffix like `-high` or `-xhigh`, e.g. `gpt-5.4-high` →
/// `("gpt-5.4", Some("high"))`.
///
/// When no suffix is present the effort is `None` — callers should omit
/// the `reasoning` field entirely so the API applies its own per-model
/// default. This avoids hard-coding a default that may be invalid for
/// certain models (e.g. `gpt-5-pro` only accepts `high`; older o-series
/// models reject `none` and `xhigh`).
pub fn extract_reasoning_effort(model_name: &str) -> (String, Option<String>) {
if !is_openai_responses_model(model_name) {
return (model_name.to_string(), None);
}
let parts: Vec<&str> = model_name.split('-').collect();
let last_part = parts.last().unwrap();
match *last_part {
"low" | "medium" | "high" => {
let base_name = parts[..parts.len() - 1].join("-");
(base_name, Some(last_part.to_string()))
}
_ => (model_name.to_string(), Some("medium".to_string())),
static RE: OnceLock<Regex> = OnceLock::new();
let re = RE.get_or_init(|| {
Regex::new(r"(?i)^(?P<base>.+)-(?P<effort>none|low|medium|high|xhigh)$").unwrap()
});
if let Some(captures) = re.captures(model_name) {
let base = captures["base"].to_string();
let effort = captures["effort"].to_ascii_lowercase();
return (base, Some(effort));
}
(model_name.to_string(), None)
}
pub fn sanitize_function_name(name: &str) -> String {
@ -870,4 +892,65 @@ mod tests {
Some(Duration::from_secs(42))
);
}
#[test]
fn test_is_openai_responses_model_matches_o_and_gpt5_families() {
for model in [
"o3",
"o3-mini",
"o4-mini",
"gpt-5",
"gpt-5-pro",
"gpt-5.4",
"gpt-5.4-mini",
"gpt-5-4",
"gpt-5-2-pro",
"databricks-gpt-5.4",
"goose-gpt-5.4-high",
"headless-goose-o3-mini",
] {
assert!(is_openai_responses_model(model), "{model} should match");
}
}
#[test]
fn test_is_openai_responses_model_rejects_other_families() {
for model in [
"gpt-4o",
"claude-sonnet-4",
"databricks-claude-sonnet-4",
"llama-3-70b",
] {
assert!(
!is_openai_responses_model(model),
"{model} should not match"
);
}
}
#[test]
fn test_extract_reasoning_effort_for_responses_models() {
for (model, expected_name, expected_effort) in [
("o3-none", "o3", Some("none")),
("o3-xhigh", "o3", Some("xhigh")),
("gpt-5-low", "gpt-5", Some("low")),
("gpt-5.4", "gpt-5.4", None),
(
"databricks-gpt-5.4-high",
"databricks-gpt-5.4",
Some("high"),
),
("databricks-o3-low", "databricks-o3", Some("low")),
("goose-gpt-5-high", "goose-gpt-5", Some("high")),
("gpt-4o", "gpt-4o", None),
] {
let (name, effort) = extract_reasoning_effort(model);
assert_eq!(name, expected_name, "unexpected base model for {model}");
assert_eq!(
effort.as_deref(),
expected_effort,
"unexpected effort for {model}"
);
}
}
}

View file

@ -422,10 +422,12 @@ pub async fn run_load_mode<C: Connection>() {
}
pub async fn run_load_model<C: Connection>() {
// Use a Chat Completions model so the canned SSE fixtures parse correctly.
// TODO: add a Responses API mock to OpenAiFixture for responses-routed models.
let expected_session_id = C::expected_session_id();
let openai = OpenAiFixture::new(
vec![(
r#""model":"o4-mini""#.into(),
r#""model":"gpt-4.1""#.into(),
include_str!("../acp_test_data/openai_basic.txt"),
)],
expected_session_id.clone(),
@ -437,7 +439,7 @@ pub async fn run_load_model<C: Connection>() {
expected_session_id.set(&session.session_id().0);
let session_id = session.session_id().0.to_string();
conn.set_model(&session_id, "o4-mini").await.unwrap();
conn.set_model(&session_id, "gpt-4.1").await.unwrap();
let output = session
.prompt("what is 1+1", PermissionDecision::Cancel)
@ -446,7 +448,7 @@ pub async fn run_load_model<C: Connection>() {
assert_eq!(output.text, "2");
let SessionData { models, .. } = conn.load_session(&session_id, vec![]).await.unwrap();
assert_eq!(&*models.unwrap().current_model_id.0, "o4-mini");
assert_eq!(&*models.unwrap().current_model_id.0, "gpt-4.1");
}
pub async fn run_load_session_mcp<C: Connection>() {
@ -773,12 +775,14 @@ enum SetModelVia {
}
async fn run_model_set_impl<C: Connection>(via: SetModelVia) {
// Use a Chat Completions model so the canned SSE fixtures parse correctly.
// TODO: add a Responses API mock to OpenAiFixture for responses-routed models.
let expected_session_id = C::expected_session_id();
let openai = OpenAiFixture::new(
vec![
// Session B prompt with switched model
(
r#""model":"o4-mini""#.into(),
r#""model":"gpt-4.1""#.into(),
include_str!("../acp_test_data/openai_basic.txt"),
),
// Session A prompt with default model
@ -803,23 +807,23 @@ async fn run_model_set_impl<C: Connection>(via: SetModelVia) {
..
} = conn.new_session().await.unwrap();
// Session B: switch to o4-mini
// Session B: switch to gpt-4.1
let SessionData {
session: mut session_b,
..
} = conn.new_session().await.unwrap();
let session_id = &session_b.session_id().0;
match via {
SetModelVia::Dedicated => conn.set_model(session_id, "o4-mini").await.unwrap(),
SetModelVia::Dedicated => conn.set_model(session_id, "gpt-4.1").await.unwrap(),
SetModelVia::ConfigOption => conn
.set_config_option(session_id, "model", "o4-mini")
.set_config_option(session_id, "model", "gpt-4.1")
.await
.unwrap(),
}
let set_model_notifs = session_b.notifications();
// Prompt B — expects o4-mini
// Prompt B — expects gpt-4.1
expected_session_id.set(&session_b.session_id().0);
let output = session_b
.prompt("what is 1+1", PermissionDecision::Cancel)
@ -1152,13 +1156,16 @@ pub async fn run_prompt_mcp<C: Connection>() {
}
pub async fn run_prompt_model_mismatch<C: Connection>() {
// Start the connection where the current model is not desired.
// Start the connection where the current model differs from TEST_MODEL.
// Use a Chat Completions model so the canned SSE fixtures parse correctly.
// TODO: add a Responses API mock to OpenAiFixture so we can test with
// responses-routed models like o4-mini here.
let config = TestConnectionConfig {
current_model: "o4-mini".to_string(),
current_model: "gpt-4.1".to_string(),
..Default::default()
};
// Server starts on o4-mini; client is configured with TEST_MODEL.
// Server starts on gpt-4.1; client is configured with TEST_MODEL.
// If session_model is seeded from the response, stream() detects the
// mismatch and sends set_model(TEST_MODEL) before prompting.
let BasicSession { conn: _, .. } = new_basic_session::<C>(config).await;

View file

@ -250,6 +250,9 @@ fn test_developer_fs_requests_use_acp_session_id() {
)
.await;
let config = TestConnectionConfig {
// gpt-5-nano routes to the Responses API; use a Chat Completions
// model so the canned SSE fixtures are parsed correctly.
current_model: "gpt-4.1".to_string(),
read_text_file: Some(Arc::new(move |req| {
*seen_session_id_clone.lock().unwrap() = Some(req.session_id.0.to_string());
Ok(sacp::schema::ReadTextFileResponse::new(

View file

@ -48,12 +48,13 @@ fn create_test_provider(mock_server_url: &str) -> Box<dyn Provider> {
async fn setup_mock_server() -> (MockServer, HeaderCapture, Box<dyn Provider>) {
let mock_server = MockServer::start().await;
let capture = HeaderCapture::new();
let capture_clone = capture.clone();
let chat_capture = capture.clone();
let responses_capture = capture.clone();
Mock::given(method("POST"))
.and(path("/v1/chat/completions"))
.respond_with(move |req: &Request| {
capture_clone.capture_session_header(req);
chat_capture.capture_session_header(req);
// Return SSE streaming format
let sse_response = format!(
"data: {}\n\ndata: {}\n\ndata: [DONE]\n\n",
@ -85,6 +86,57 @@ async fn setup_mock_server() -> (MockServer, HeaderCapture, Box<dyn Provider>) {
.mount(&mock_server)
.await;
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(move |req: &Request| {
responses_capture.capture_session_header(req);
let sse_response = format!(
"data: {}\n\ndata: {}\n\ndata: {}\n\ndata: [DONE]\n\n",
json!({
"type": "response.created",
"sequence_number": 1,
"response": {
"id": "resp_test",
"object": "response",
"created_at": 1755133833,
"status": "in_progress",
"model": "gpt-5-nano",
"output": []
}
}),
json!({
"type": "response.output_text.delta",
"sequence_number": 2,
"item_id": "msg_test",
"output_index": 0,
"content_index": 0,
"delta": "Hi there! How can I help you today?"
}),
json!({
"type": "response.completed",
"sequence_number": 3,
"response": {
"id": "resp_test",
"object": "response",
"created_at": 1755133833,
"status": "completed",
"model": "gpt-5-nano",
"output": [],
"usage": {
"input_tokens": 8,
"output_tokens": 10,
"total_tokens": 18
}
}
})
);
ResponseTemplate::new(200)
.set_body_string(sse_response)
.insert_header("content-type", "text/event-stream")
})
.mount(&mock_server)
.await;
let provider = create_test_provider(&mock_server.uri());
(mock_server, capture, provider)
}