mirror of
https://github.com/block/goose.git
synced 2026-04-26 10:40:45 +00:00
fix: add strict:false to Responses API tools and gpt-5.4 to known models (#8636)
Some checks are pending
Canary / Prepare Version (push) Waiting to run
Canary / build-cli (push) Blocked by required conditions
Canary / Upload Install Script (push) Blocked by required conditions
Canary / bundle-desktop (push) Blocked by required conditions
Canary / bundle-desktop-intel (push) Blocked by required conditions
Canary / bundle-desktop-linux (push) Blocked by required conditions
Canary / bundle-desktop-windows (push) Blocked by required conditions
Canary / Release (push) Blocked by required conditions
Cargo Deny / deny (push) Waiting to run
Unused Dependencies / machete (push) Waiting to run
CI / changes (push) Waiting to run
CI / Check Rust Code Format (push) Blocked by required conditions
CI / Build and Test Rust Project (push) Blocked by required conditions
CI / Build Rust Project on Windows (push) Waiting to run
CI / Check MSRV (push) Blocked by required conditions
CI / Lint Rust Code (push) Blocked by required conditions
CI / Check Generated Schemas are Up-to-Date (push) Blocked by required conditions
CI / Test and Lint Electron Desktop App (push) Blocked by required conditions
Deploy Documentation / deploy (push) Waiting to run
Goose 2 CI / Lint & Format (push) Waiting to run
Goose 2 CI / Unit Tests (push) Waiting to run
Goose 2 CI / Desktop Build & E2E (push) Waiting to run
Goose 2 CI / Rust Lint (push) Waiting to run
Live Provider Tests / Build Binary (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (Code Execution) (push) Blocked by required conditions
Live Provider Tests / Compaction Tests (push) Blocked by required conditions
Live Provider Tests / check-fork (push) Waiting to run
Live Provider Tests / changes (push) Blocked by required conditions
Live Provider Tests / goose server HTTP integration tests (push) Blocked by required conditions
Publish Ask AI Bot Docker Image / docker (push) Waiting to run
Publish Docker Image / docker (push) Waiting to run
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run
Some checks are pending
Canary / Prepare Version (push) Waiting to run
Canary / build-cli (push) Blocked by required conditions
Canary / Upload Install Script (push) Blocked by required conditions
Canary / bundle-desktop (push) Blocked by required conditions
Canary / bundle-desktop-intel (push) Blocked by required conditions
Canary / bundle-desktop-linux (push) Blocked by required conditions
Canary / bundle-desktop-windows (push) Blocked by required conditions
Canary / Release (push) Blocked by required conditions
Cargo Deny / deny (push) Waiting to run
Unused Dependencies / machete (push) Waiting to run
CI / changes (push) Waiting to run
CI / Check Rust Code Format (push) Blocked by required conditions
CI / Build and Test Rust Project (push) Blocked by required conditions
CI / Build Rust Project on Windows (push) Waiting to run
CI / Check MSRV (push) Blocked by required conditions
CI / Lint Rust Code (push) Blocked by required conditions
CI / Check Generated Schemas are Up-to-Date (push) Blocked by required conditions
CI / Test and Lint Electron Desktop App (push) Blocked by required conditions
Deploy Documentation / deploy (push) Waiting to run
Goose 2 CI / Lint & Format (push) Waiting to run
Goose 2 CI / Unit Tests (push) Waiting to run
Goose 2 CI / Desktop Build & E2E (push) Waiting to run
Goose 2 CI / Rust Lint (push) Waiting to run
Live Provider Tests / Build Binary (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (Code Execution) (push) Blocked by required conditions
Live Provider Tests / Compaction Tests (push) Blocked by required conditions
Live Provider Tests / check-fork (push) Waiting to run
Live Provider Tests / changes (push) Blocked by required conditions
Live Provider Tests / goose server HTTP integration tests (push) Blocked by required conditions
Publish Ask AI Bot Docker Image / docker (push) Waiting to run
Publish Docker Image / docker (push) Waiting to run
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run
Signed-off-by: Bradley Axen <baxen@squareup.com>
This commit is contained in:
parent
7325fbdae3
commit
ab0c40d26a
11 changed files with 474 additions and 130 deletions
|
|
@ -1,7 +1,12 @@
|
|||
use std::sync::{Arc, Mutex};
|
||||
|
||||
pub const TEST_SESSION_ID: &str = "test-session-id";
|
||||
pub const TEST_MODEL: &str = "gpt-5-nano";
|
||||
// Use a Chat Completions model so the canned SSE fixtures (which return
|
||||
// Chat Completions format) are parsed correctly. gpt-5-nano now routes to
|
||||
// the Responses API which needs a different mock format.
|
||||
// TODO: add a Responses API mock to OpenAiFixture so tests can cover
|
||||
// responses-routed models like gpt-5-nano end-to-end.
|
||||
pub const TEST_MODEL: &str = "gpt-4.1";
|
||||
|
||||
const NOT_YET_SET: &str = "session-id-not-yet-set";
|
||||
pub(crate) const SESSION_ID_HEADER: &str = "agent-session-id";
|
||||
|
|
|
|||
|
|
@ -138,9 +138,22 @@ impl ModelConfig {
|
|||
}
|
||||
}
|
||||
|
||||
if let Some(canonical) =
|
||||
// Try canonical lookup with the full model name first, then fall back
|
||||
// to the name with reasoning-effort suffixes stripped (e.g.
|
||||
// "databricks-gpt-5.4-high" → "databricks-gpt-5.4").
|
||||
let canonical =
|
||||
crate::providers::canonical::maybe_get_canonical_model(provider_name, &self.model_name)
|
||||
{
|
||||
.or_else(|| {
|
||||
let (base, _effort) =
|
||||
crate::providers::utils::extract_reasoning_effort(&self.model_name);
|
||||
if base != self.model_name {
|
||||
crate::providers::canonical::maybe_get_canonical_model(provider_name, &base)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
if let Some(canonical) = canonical {
|
||||
if self.context_limit.is_none() {
|
||||
self.context_limit = Some(canonical.limit.context);
|
||||
}
|
||||
|
|
@ -299,15 +312,7 @@ impl ModelConfig {
|
|||
}
|
||||
|
||||
pub fn is_openai_reasoning_model(&self) -> bool {
|
||||
const DATABRICKS_MODEL_NAME_PREFIXES: &[&str] = &["goose-", "databricks-"];
|
||||
const REASONING_PREFIXES: &[&str] = &["o1", "o3", "o4", "gpt-5"];
|
||||
|
||||
let base = DATABRICKS_MODEL_NAME_PREFIXES
|
||||
.iter()
|
||||
.find_map(|p| self.model_name.strip_prefix(p))
|
||||
.unwrap_or(&self.model_name);
|
||||
|
||||
REASONING_PREFIXES.iter().any(|p| base.starts_with(p))
|
||||
crate::providers::utils::is_openai_responses_model(&self.model_name)
|
||||
}
|
||||
|
||||
pub fn max_output_tokens(&self) -> i32 {
|
||||
|
|
@ -499,6 +504,28 @@ mod tests {
|
|||
assert_eq!(config.max_tokens, None);
|
||||
assert_eq!(config.reasoning, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_after_stripping_reasoning_effort_suffix() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("GOOSE_MAX_TOKENS", None::<&str>),
|
||||
("GOOSE_CONTEXT_LIMIT", None::<&str>),
|
||||
]);
|
||||
|
||||
// "databricks-gpt-5.4-high" should resolve via "databricks-gpt-5.4"
|
||||
let config = ModelConfig::new_or_fail("databricks-gpt-5.4-high")
|
||||
.with_canonical_limits("databricks");
|
||||
assert_eq!(config.context_limit, Some(1_050_000));
|
||||
|
||||
// "gpt-5.4-xhigh" should resolve via "gpt-5.4"
|
||||
let config = ModelConfig::new_or_fail("gpt-5.4-xhigh").with_canonical_limits("openai");
|
||||
assert_eq!(config.context_limit, Some(1_050_000));
|
||||
|
||||
// "gpt-5.4-nano-low" should resolve via "gpt-5.4-nano"
|
||||
let config =
|
||||
ModelConfig::new_or_fail("gpt-5.4-nano-low").with_canonical_limits("openai");
|
||||
assert_eq!(config.context_limit, Some(400_000));
|
||||
}
|
||||
}
|
||||
|
||||
mod is_openai_reasoning_model {
|
||||
|
|
|
|||
|
|
@ -269,17 +269,19 @@ impl DatabricksProvider {
|
|||
}
|
||||
|
||||
fn is_responses_model(model_name: &str) -> bool {
|
||||
let normalized = model_name.to_ascii_lowercase();
|
||||
normalized.contains("codex")
|
||||
super::utils::is_openai_responses_model(model_name)
|
||||
}
|
||||
|
||||
fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String {
|
||||
if is_embedding {
|
||||
"serving-endpoints/text-embedding-3-small/invocations".to_string()
|
||||
} else if Self::is_responses_model(model_name) {
|
||||
"serving-endpoints/responses".to_string()
|
||||
} else {
|
||||
format!("serving-endpoints/{}/invocations", model_name)
|
||||
let (clean_name, _) = super::utils::extract_reasoning_effort(model_name);
|
||||
if Self::is_responses_model(&clean_name) {
|
||||
"serving-endpoints/responses".to_string()
|
||||
} else {
|
||||
format!("serving-endpoints/{}/invocations", clean_name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -594,3 +596,48 @@ impl EmbeddingCapable for DatabricksProvider {
|
|||
Ok(embeddings)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn test_provider() -> DatabricksProvider {
|
||||
DatabricksProvider {
|
||||
api_client: super::super::api_client::ApiClient::new(
|
||||
"https://example.com".to_string(),
|
||||
super::super::api_client::AuthMethod::NoAuth,
|
||||
)
|
||||
.unwrap(),
|
||||
auth: DatabricksAuth::Token("fake".into()),
|
||||
model: ModelConfig::new_or_fail("databricks-gpt-5.4"),
|
||||
image_format: ImageFormat::OpenAi,
|
||||
retry_config: RetryConfig::default(),
|
||||
fast_retry_config: RetryConfig::new(0, 0, 1.0, 0),
|
||||
name: "databricks".into(),
|
||||
token_cache: std::sync::Arc::new(std::sync::Mutex::new(None)),
|
||||
instance_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn responses_models_route_to_responses_endpoint() {
|
||||
let provider = test_provider();
|
||||
|
||||
for (model_name, expected_path) in [
|
||||
("gpt-5.4", "serving-endpoints/responses"),
|
||||
("databricks-gpt-5.4-high", "serving-endpoints/responses"),
|
||||
("databricks-gpt-5-4-xhigh", "serving-endpoints/responses"),
|
||||
("o3-mini", "serving-endpoints/responses"),
|
||||
(
|
||||
"databricks-claude-sonnet-4",
|
||||
"serving-endpoints/databricks-claude-sonnet-4/invocations",
|
||||
),
|
||||
] {
|
||||
assert_eq!(
|
||||
provider.get_endpoint_path(model_name, false),
|
||||
expected_path,
|
||||
"unexpected endpoint for {model_name}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,8 +2,9 @@ use crate::conversation::message::{Message, MessageContent};
|
|||
use crate::model::ModelConfig;
|
||||
use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType};
|
||||
use crate::providers::utils::{
|
||||
convert_image, detect_image_path, is_valid_function_name, load_image_file, safely_parse_json,
|
||||
sanitize_function_name, ImageFormat,
|
||||
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
|
||||
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
|
||||
ImageFormat,
|
||||
};
|
||||
use anyhow::{anyhow, Error};
|
||||
use rmcp::model::{
|
||||
|
|
@ -581,24 +582,8 @@ pub fn create_request(
|
|||
));
|
||||
}
|
||||
|
||||
let is_openai_reasoning_model = model_config.is_openai_reasoning_model();
|
||||
let (model_name, reasoning_effort) = if is_openai_reasoning_model {
|
||||
let parts: Vec<&str> = model_config.model_name.split('-').collect();
|
||||
let last_part = parts.last().unwrap();
|
||||
|
||||
match *last_part {
|
||||
"low" | "medium" | "high" => {
|
||||
let base_name = parts[..parts.len() - 1].join("-");
|
||||
(base_name, Some(last_part.to_string()))
|
||||
}
|
||||
_ => (
|
||||
model_config.model_name.to_string(),
|
||||
Some("medium".to_string()),
|
||||
),
|
||||
}
|
||||
} else {
|
||||
(model_config.model_name.to_string(), None)
|
||||
};
|
||||
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
|
||||
let is_openai_reasoning_model = is_openai_responses_model(&model_name);
|
||||
|
||||
let system_message = DatabricksMessage {
|
||||
role: "system".to_string(),
|
||||
|
|
@ -1073,6 +1058,63 @@ mod tests {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> {
|
||||
let model_config = ModelConfig {
|
||||
model_name: "o3-xhigh".to_string(),
|
||||
context_limit: Some(4096),
|
||||
temperature: None,
|
||||
max_tokens: Some(1024),
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: None,
|
||||
reasoning: None,
|
||||
};
|
||||
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
|
||||
assert_eq!(request["model"], "o3");
|
||||
assert_eq!(request["reasoning_effort"], "xhigh");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_reasoning_effort_none() -> anyhow::Result<()> {
|
||||
let model_config = ModelConfig {
|
||||
model_name: "o3-none".to_string(),
|
||||
context_limit: Some(4096),
|
||||
temperature: None,
|
||||
max_tokens: Some(1024),
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: None,
|
||||
reasoning: None,
|
||||
};
|
||||
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
|
||||
assert_eq!(request["model"], "o3");
|
||||
assert_eq!(request["reasoning_effort"], "none");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_reasoning_effort_for_prefixed_gpt5_model() -> anyhow::Result<()> {
|
||||
let model_config = ModelConfig {
|
||||
model_name: "databricks-gpt-5.4-high".to_string(),
|
||||
context_limit: Some(4096),
|
||||
temperature: None,
|
||||
max_tokens: Some(1024),
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: None,
|
||||
reasoning: None,
|
||||
};
|
||||
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
|
||||
assert_eq!(request["model"], "databricks-gpt-5.4");
|
||||
assert_eq!(request["reasoning_effort"], "high");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> {
|
||||
let _guard = env_lock::lock_env([
|
||||
|
|
|
|||
|
|
@ -4,8 +4,9 @@ use crate::model::ModelConfig;
|
|||
use crate::providers::base::{ProviderUsage, Usage};
|
||||
use crate::providers::errors::ProviderError;
|
||||
use crate::providers::utils::{
|
||||
convert_image, detect_image_path, extract_reasoning_effort, is_valid_function_name,
|
||||
load_image_file, safely_parse_json, sanitize_function_name, ImageFormat,
|
||||
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
|
||||
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
|
||||
ImageFormat,
|
||||
};
|
||||
use anyhow::{anyhow, Error};
|
||||
use async_stream::try_stream;
|
||||
|
|
@ -984,7 +985,7 @@ pub fn create_request(
|
|||
}
|
||||
|
||||
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
|
||||
let is_reasoning_model = reasoning_effort.is_some();
|
||||
let is_reasoning_model = is_openai_responses_model(&model_name);
|
||||
|
||||
let system_message = json!({
|
||||
"role": if is_reasoning_model { "developer" } else { "system" },
|
||||
|
|
@ -1716,7 +1717,8 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_create_request_o1_default() -> anyhow::Result<()> {
|
||||
// Test default medium reasoning effort for O1 model
|
||||
// Without an explicit effort suffix the API picks its own default;
|
||||
// we should omit reasoning_effort entirely but still use "developer" role.
|
||||
let model_config = ModelConfig {
|
||||
model_name: "o1".to_string(),
|
||||
context_limit: Some(4096),
|
||||
|
|
@ -1745,13 +1747,16 @@ mod tests {
|
|||
"content": "system"
|
||||
}
|
||||
],
|
||||
"reasoning_effort": "medium",
|
||||
"max_completion_tokens": 1024
|
||||
});
|
||||
|
||||
for (key, value) in expected.as_object().unwrap() {
|
||||
assert_eq!(obj.get(key).unwrap(), value);
|
||||
}
|
||||
assert!(
|
||||
obj.get("reasoning_effort").is_none(),
|
||||
"reasoning_effort should be omitted when no explicit suffix is provided"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use crate::conversation::message::{Message, MessageContent};
|
|||
use crate::mcp_utils::extract_text_from_resource;
|
||||
use crate::model::ModelConfig;
|
||||
use crate::providers::base::{ProviderUsage, Usage};
|
||||
use crate::providers::utils::extract_reasoning_effort;
|
||||
use crate::providers::utils::{extract_reasoning_effort, is_openai_responses_model};
|
||||
use anyhow::{anyhow, Error};
|
||||
use async_stream::try_stream;
|
||||
use chrono;
|
||||
|
|
@ -468,7 +468,10 @@ pub fn create_responses_request(
|
|||
add_message_items(&mut input_items, messages);
|
||||
|
||||
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
|
||||
let is_reasoning_model = reasoning_effort.is_some();
|
||||
// All models routed here are responses-capable; temperature is rejected
|
||||
// by the API for reasoning models regardless of whether an explicit
|
||||
// effort suffix was provided.
|
||||
let is_reasoning_model = is_openai_responses_model(&model_name);
|
||||
|
||||
let mut payload = json!({
|
||||
"model": model_name,
|
||||
|
|
@ -495,6 +498,7 @@ pub fn create_responses_request(
|
|||
"name": tool.name,
|
||||
"description": tool.description,
|
||||
"parameters": tool.input_schema,
|
||||
"strict": false,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
|
@ -1083,4 +1087,103 @@ mod tests {
|
|||
assert_eq!(info.effort.as_deref(), Some("high"));
|
||||
assert_eq!(info.summary.as_deref(), Some("Thought deeply"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_responses_tools_include_strict_false() {
|
||||
let model_config = ModelConfig {
|
||||
model_name: "gpt-5.4".to_string(),
|
||||
context_limit: None,
|
||||
temperature: None,
|
||||
max_tokens: None,
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: None,
|
||||
reasoning: None,
|
||||
};
|
||||
|
||||
let tool = Tool::new(
|
||||
"shell",
|
||||
"Execute a shell command",
|
||||
object!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {
|
||||
"type": "string",
|
||||
"description": "The command to run"
|
||||
}
|
||||
},
|
||||
"required": ["command"]
|
||||
}),
|
||||
);
|
||||
|
||||
let result =
|
||||
create_responses_request(&model_config, "You are helpful.", &[], &[tool]).unwrap();
|
||||
let tools = result["tools"]
|
||||
.as_array()
|
||||
.expect("tools should be an array");
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0]["strict"], json!(false),
|
||||
"Responses API defaults strict to true, but MCP tool schemas are not strict-compatible; must explicitly set strict: false");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_responses_request_with_explicit_effort_suffix() {
|
||||
for (model_name, expected_model, expected_effort) in [
|
||||
("gpt-5.4-xhigh", "gpt-5.4", "xhigh"),
|
||||
("databricks-gpt-5.4-high", "databricks-gpt-5.4", "high"),
|
||||
("databricks-o3-none", "databricks-o3", "none"),
|
||||
] {
|
||||
let model_config = ModelConfig {
|
||||
model_name: model_name.to_string(),
|
||||
context_limit: None,
|
||||
temperature: None,
|
||||
max_tokens: None,
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: None,
|
||||
reasoning: None,
|
||||
};
|
||||
|
||||
let result =
|
||||
create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result["model"], expected_model,
|
||||
"unexpected model for {model_name}"
|
||||
);
|
||||
assert_eq!(
|
||||
result["reasoning"]["effort"], expected_effort,
|
||||
"unexpected effort for {model_name}"
|
||||
);
|
||||
assert_eq!(result["reasoning"]["summary"], "auto");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_responses_request_without_effort_suffix_omits_reasoning() {
|
||||
for model_name in ["gpt-5.4", "o3", "gpt-5-nano"] {
|
||||
let model_config = ModelConfig {
|
||||
model_name: model_name.to_string(),
|
||||
context_limit: None,
|
||||
temperature: None,
|
||||
max_tokens: None,
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: None,
|
||||
reasoning: None,
|
||||
};
|
||||
|
||||
let result =
|
||||
create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap();
|
||||
|
||||
assert_eq!(result["model"], model_name, "model should be unchanged");
|
||||
assert!(
|
||||
result.get("reasoning").is_none(),
|
||||
"reasoning should be omitted for {model_name} without explicit effort suffix"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,9 +48,21 @@ pub const OPEN_AI_KNOWN_MODELS: &[(&str, usize)] = &[
|
|||
("gpt-3.5-turbo", 16_385),
|
||||
("gpt-4-turbo", 128_000),
|
||||
("o4-mini", 128_000),
|
||||
("gpt-5", 400_000),
|
||||
("gpt-5-mini", 400_000),
|
||||
("gpt-5-nano", 400_000),
|
||||
("gpt-5.1-codex", 400_000),
|
||||
("gpt-5-pro", 400_000),
|
||||
("gpt-5-codex", 400_000),
|
||||
("gpt-5.1", 400_000),
|
||||
("gpt-5.1-codex", 400_000),
|
||||
("gpt-5.2", 400_000),
|
||||
("gpt-5.2-codex", 400_000),
|
||||
("gpt-5.2-pro", 400_000),
|
||||
("gpt-5.3-codex", 400_000),
|
||||
("gpt-5.4", 1_050_000),
|
||||
("gpt-5.4-mini", 400_000),
|
||||
("gpt-5.4-nano", 400_000),
|
||||
("gpt-5.4-pro", 1_050_000),
|
||||
];
|
||||
|
||||
pub const OPEN_AI_DOC_URL: &str = "https://platform.openai.com/docs/models";
|
||||
|
|
@ -283,10 +295,7 @@ impl OpenAiProvider {
|
|||
}
|
||||
|
||||
fn is_responses_model(model_name: &str) -> bool {
|
||||
let normalized_model = model_name.to_ascii_lowercase();
|
||||
(normalized_model.starts_with("gpt-5") && normalized_model.contains("codex"))
|
||||
|| normalized_model.starts_with("gpt-5.2-pro")
|
||||
|| normalized_model.starts_with("gpt-5.4")
|
||||
super::utils::is_openai_responses_model(model_name)
|
||||
}
|
||||
|
||||
fn should_use_responses_api(model_name: &str, base_path: &str) -> bool {
|
||||
|
|
@ -834,59 +843,20 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn gpt_5_2_codex_uses_responses_when_base_path_is_default() {
|
||||
assert!(OpenAiProvider::should_use_responses_api(
|
||||
"gpt-5.2-codex",
|
||||
"v1/chat/completions"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gpt_5_2_pro_uses_responses_when_base_path_is_default() {
|
||||
assert!(OpenAiProvider::should_use_responses_api(
|
||||
"gpt-5.2-pro",
|
||||
"v1/chat/completions"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gpt_5_2_pro_with_date_uses_responses() {
|
||||
assert!(OpenAiProvider::should_use_responses_api(
|
||||
"gpt-5.2-pro-2025-12-11",
|
||||
"v1/chat/completions"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_chat_path_forces_chat_completions() {
|
||||
assert!(!OpenAiProvider::should_use_responses_api(
|
||||
"gpt-5.2-codex",
|
||||
"openai/v1/chat/completions"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gpt_5_4_uses_responses_when_base_path_is_default() {
|
||||
assert!(OpenAiProvider::should_use_responses_api(
|
||||
"gpt-5.4",
|
||||
"v1/chat/completions"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gpt_5_4_with_date_uses_responses() {
|
||||
assert!(OpenAiProvider::should_use_responses_api(
|
||||
"gpt-5.4-2026-03-01",
|
||||
"v1/chat/completions"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gpt_4o_does_not_use_responses() {
|
||||
assert!(!OpenAiProvider::should_use_responses_api(
|
||||
"gpt-4o",
|
||||
"v1/chat/completions"
|
||||
));
|
||||
fn responses_api_routing_uses_model_family_unless_path_forces_chat() {
|
||||
for (model_name, base_path, expected) in [
|
||||
("gpt-5.4", "v1/chat/completions", true),
|
||||
("gpt-5.4-xhigh", "v1/chat/completions", true),
|
||||
("gpt-5.2-pro-2025-12-11", "v1/chat/completions", true),
|
||||
("gpt-4o", "v1/chat/completions", false),
|
||||
("gpt-5.2-codex", "openai/v1/chat/completions", false),
|
||||
] {
|
||||
assert_eq!(
|
||||
OpenAiProvider::should_use_responses_api(model_name, base_path),
|
||||
expected,
|
||||
"unexpected routing for {model_name} via {base_path}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -193,26 +193,48 @@ pub async fn handle_response_google_compat(response: Response) -> Result<Value,
|
|||
}
|
||||
}
|
||||
|
||||
pub fn extract_reasoning_effort(model_name: &str) -> (String, Option<String>) {
|
||||
let is_reasoning_model = model_name.starts_with("o1")
|
||||
|| model_name.starts_with("o2")
|
||||
|| model_name.starts_with("o3")
|
||||
|| model_name.starts_with("o4")
|
||||
|| model_name.starts_with("gpt-5");
|
||||
/// True when the model should use the OpenAI Responses API.
|
||||
///
|
||||
/// The Responses API is backwards-compatible with all OpenAI reasoning
|
||||
/// models, so every `o`-series (`o1`, `o3`, `o4`, …) and `gpt-5` variant
|
||||
/// routes here. The matcher intentionally scans the full model identifier so
|
||||
/// hosted aliases like `databricks-gpt-5.4`, `goose-o3-mini`, or
|
||||
/// `headless-goose-o3-mini` work without provider-specific normalization.
|
||||
pub fn is_openai_responses_model(model_name: &str) -> bool {
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
let re =
|
||||
RE.get_or_init(|| Regex::new(r"(?i)(?:^|[-/])(?:o\d+(?:$|-)|gpt-5(?:$|[-.]))").unwrap());
|
||||
re.is_match(model_name)
|
||||
}
|
||||
|
||||
if !is_reasoning_model {
|
||||
/// Extract an explicit reasoning-effort suffix from a model name.
|
||||
///
|
||||
/// Returns `(base_model_name, Some(effort))` when the user appended a
|
||||
/// recognised suffix like `-high` or `-xhigh`, e.g. `gpt-5.4-high` →
|
||||
/// `("gpt-5.4", Some("high"))`.
|
||||
///
|
||||
/// When no suffix is present the effort is `None` — callers should omit
|
||||
/// the `reasoning` field entirely so the API applies its own per-model
|
||||
/// default. This avoids hard-coding a default that may be invalid for
|
||||
/// certain models (e.g. `gpt-5-pro` only accepts `high`; older o-series
|
||||
/// models reject `none` and `xhigh`).
|
||||
pub fn extract_reasoning_effort(model_name: &str) -> (String, Option<String>) {
|
||||
if !is_openai_responses_model(model_name) {
|
||||
return (model_name.to_string(), None);
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = model_name.split('-').collect();
|
||||
let last_part = parts.last().unwrap();
|
||||
match *last_part {
|
||||
"low" | "medium" | "high" => {
|
||||
let base_name = parts[..parts.len() - 1].join("-");
|
||||
(base_name, Some(last_part.to_string()))
|
||||
}
|
||||
_ => (model_name.to_string(), Some("medium".to_string())),
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
let re = RE.get_or_init(|| {
|
||||
Regex::new(r"(?i)^(?P<base>.+)-(?P<effort>none|low|medium|high|xhigh)$").unwrap()
|
||||
});
|
||||
|
||||
if let Some(captures) = re.captures(model_name) {
|
||||
let base = captures["base"].to_string();
|
||||
let effort = captures["effort"].to_ascii_lowercase();
|
||||
return (base, Some(effort));
|
||||
}
|
||||
|
||||
(model_name.to_string(), None)
|
||||
}
|
||||
|
||||
pub fn sanitize_function_name(name: &str) -> String {
|
||||
|
|
@ -870,4 +892,65 @@ mod tests {
|
|||
Some(Duration::from_secs(42))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_openai_responses_model_matches_o_and_gpt5_families() {
|
||||
for model in [
|
||||
"o3",
|
||||
"o3-mini",
|
||||
"o4-mini",
|
||||
"gpt-5",
|
||||
"gpt-5-pro",
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5-4",
|
||||
"gpt-5-2-pro",
|
||||
"databricks-gpt-5.4",
|
||||
"goose-gpt-5.4-high",
|
||||
"headless-goose-o3-mini",
|
||||
] {
|
||||
assert!(is_openai_responses_model(model), "{model} should match");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_openai_responses_model_rejects_other_families() {
|
||||
for model in [
|
||||
"gpt-4o",
|
||||
"claude-sonnet-4",
|
||||
"databricks-claude-sonnet-4",
|
||||
"llama-3-70b",
|
||||
] {
|
||||
assert!(
|
||||
!is_openai_responses_model(model),
|
||||
"{model} should not match"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_reasoning_effort_for_responses_models() {
|
||||
for (model, expected_name, expected_effort) in [
|
||||
("o3-none", "o3", Some("none")),
|
||||
("o3-xhigh", "o3", Some("xhigh")),
|
||||
("gpt-5-low", "gpt-5", Some("low")),
|
||||
("gpt-5.4", "gpt-5.4", None),
|
||||
(
|
||||
"databricks-gpt-5.4-high",
|
||||
"databricks-gpt-5.4",
|
||||
Some("high"),
|
||||
),
|
||||
("databricks-o3-low", "databricks-o3", Some("low")),
|
||||
("goose-gpt-5-high", "goose-gpt-5", Some("high")),
|
||||
("gpt-4o", "gpt-4o", None),
|
||||
] {
|
||||
let (name, effort) = extract_reasoning_effort(model);
|
||||
assert_eq!(name, expected_name, "unexpected base model for {model}");
|
||||
assert_eq!(
|
||||
effort.as_deref(),
|
||||
expected_effort,
|
||||
"unexpected effort for {model}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -422,10 +422,12 @@ pub async fn run_load_mode<C: Connection>() {
|
|||
}
|
||||
|
||||
pub async fn run_load_model<C: Connection>() {
|
||||
// Use a Chat Completions model so the canned SSE fixtures parse correctly.
|
||||
// TODO: add a Responses API mock to OpenAiFixture for responses-routed models.
|
||||
let expected_session_id = C::expected_session_id();
|
||||
let openai = OpenAiFixture::new(
|
||||
vec![(
|
||||
r#""model":"o4-mini""#.into(),
|
||||
r#""model":"gpt-4.1""#.into(),
|
||||
include_str!("../acp_test_data/openai_basic.txt"),
|
||||
)],
|
||||
expected_session_id.clone(),
|
||||
|
|
@ -437,7 +439,7 @@ pub async fn run_load_model<C: Connection>() {
|
|||
expected_session_id.set(&session.session_id().0);
|
||||
|
||||
let session_id = session.session_id().0.to_string();
|
||||
conn.set_model(&session_id, "o4-mini").await.unwrap();
|
||||
conn.set_model(&session_id, "gpt-4.1").await.unwrap();
|
||||
|
||||
let output = session
|
||||
.prompt("what is 1+1", PermissionDecision::Cancel)
|
||||
|
|
@ -446,7 +448,7 @@ pub async fn run_load_model<C: Connection>() {
|
|||
assert_eq!(output.text, "2");
|
||||
|
||||
let SessionData { models, .. } = conn.load_session(&session_id, vec![]).await.unwrap();
|
||||
assert_eq!(&*models.unwrap().current_model_id.0, "o4-mini");
|
||||
assert_eq!(&*models.unwrap().current_model_id.0, "gpt-4.1");
|
||||
}
|
||||
|
||||
pub async fn run_load_session_mcp<C: Connection>() {
|
||||
|
|
@ -773,12 +775,14 @@ enum SetModelVia {
|
|||
}
|
||||
|
||||
async fn run_model_set_impl<C: Connection>(via: SetModelVia) {
|
||||
// Use a Chat Completions model so the canned SSE fixtures parse correctly.
|
||||
// TODO: add a Responses API mock to OpenAiFixture for responses-routed models.
|
||||
let expected_session_id = C::expected_session_id();
|
||||
let openai = OpenAiFixture::new(
|
||||
vec![
|
||||
// Session B prompt with switched model
|
||||
(
|
||||
r#""model":"o4-mini""#.into(),
|
||||
r#""model":"gpt-4.1""#.into(),
|
||||
include_str!("../acp_test_data/openai_basic.txt"),
|
||||
),
|
||||
// Session A prompt with default model
|
||||
|
|
@ -803,23 +807,23 @@ async fn run_model_set_impl<C: Connection>(via: SetModelVia) {
|
|||
..
|
||||
} = conn.new_session().await.unwrap();
|
||||
|
||||
// Session B: switch to o4-mini
|
||||
// Session B: switch to gpt-4.1
|
||||
let SessionData {
|
||||
session: mut session_b,
|
||||
..
|
||||
} = conn.new_session().await.unwrap();
|
||||
let session_id = &session_b.session_id().0;
|
||||
match via {
|
||||
SetModelVia::Dedicated => conn.set_model(session_id, "o4-mini").await.unwrap(),
|
||||
SetModelVia::Dedicated => conn.set_model(session_id, "gpt-4.1").await.unwrap(),
|
||||
SetModelVia::ConfigOption => conn
|
||||
.set_config_option(session_id, "model", "o4-mini")
|
||||
.set_config_option(session_id, "model", "gpt-4.1")
|
||||
.await
|
||||
.unwrap(),
|
||||
}
|
||||
|
||||
let set_model_notifs = session_b.notifications();
|
||||
|
||||
// Prompt B — expects o4-mini
|
||||
// Prompt B — expects gpt-4.1
|
||||
expected_session_id.set(&session_b.session_id().0);
|
||||
let output = session_b
|
||||
.prompt("what is 1+1", PermissionDecision::Cancel)
|
||||
|
|
@ -1152,13 +1156,16 @@ pub async fn run_prompt_mcp<C: Connection>() {
|
|||
}
|
||||
|
||||
pub async fn run_prompt_model_mismatch<C: Connection>() {
|
||||
// Start the connection where the current model is not desired.
|
||||
// Start the connection where the current model differs from TEST_MODEL.
|
||||
// Use a Chat Completions model so the canned SSE fixtures parse correctly.
|
||||
// TODO: add a Responses API mock to OpenAiFixture so we can test with
|
||||
// responses-routed models like o4-mini here.
|
||||
let config = TestConnectionConfig {
|
||||
current_model: "o4-mini".to_string(),
|
||||
current_model: "gpt-4.1".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Server starts on o4-mini; client is configured with TEST_MODEL.
|
||||
// Server starts on gpt-4.1; client is configured with TEST_MODEL.
|
||||
// If session_model is seeded from the response, stream() detects the
|
||||
// mismatch and sends set_model(TEST_MODEL) before prompting.
|
||||
let BasicSession { conn: _, .. } = new_basic_session::<C>(config).await;
|
||||
|
|
|
|||
|
|
@ -250,6 +250,9 @@ fn test_developer_fs_requests_use_acp_session_id() {
|
|||
)
|
||||
.await;
|
||||
let config = TestConnectionConfig {
|
||||
// gpt-5-nano routes to the Responses API; use a Chat Completions
|
||||
// model so the canned SSE fixtures are parsed correctly.
|
||||
current_model: "gpt-4.1".to_string(),
|
||||
read_text_file: Some(Arc::new(move |req| {
|
||||
*seen_session_id_clone.lock().unwrap() = Some(req.session_id.0.to_string());
|
||||
Ok(sacp::schema::ReadTextFileResponse::new(
|
||||
|
|
|
|||
|
|
@ -48,12 +48,13 @@ fn create_test_provider(mock_server_url: &str) -> Box<dyn Provider> {
|
|||
async fn setup_mock_server() -> (MockServer, HeaderCapture, Box<dyn Provider>) {
|
||||
let mock_server = MockServer::start().await;
|
||||
let capture = HeaderCapture::new();
|
||||
let capture_clone = capture.clone();
|
||||
let chat_capture = capture.clone();
|
||||
let responses_capture = capture.clone();
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/chat/completions"))
|
||||
.respond_with(move |req: &Request| {
|
||||
capture_clone.capture_session_header(req);
|
||||
chat_capture.capture_session_header(req);
|
||||
// Return SSE streaming format
|
||||
let sse_response = format!(
|
||||
"data: {}\n\ndata: {}\n\ndata: [DONE]\n\n",
|
||||
|
|
@ -85,6 +86,57 @@ async fn setup_mock_server() -> (MockServer, HeaderCapture, Box<dyn Provider>) {
|
|||
.mount(&mock_server)
|
||||
.await;
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(move |req: &Request| {
|
||||
responses_capture.capture_session_header(req);
|
||||
let sse_response = format!(
|
||||
"data: {}\n\ndata: {}\n\ndata: {}\n\ndata: [DONE]\n\n",
|
||||
json!({
|
||||
"type": "response.created",
|
||||
"sequence_number": 1,
|
||||
"response": {
|
||||
"id": "resp_test",
|
||||
"object": "response",
|
||||
"created_at": 1755133833,
|
||||
"status": "in_progress",
|
||||
"model": "gpt-5-nano",
|
||||
"output": []
|
||||
}
|
||||
}),
|
||||
json!({
|
||||
"type": "response.output_text.delta",
|
||||
"sequence_number": 2,
|
||||
"item_id": "msg_test",
|
||||
"output_index": 0,
|
||||
"content_index": 0,
|
||||
"delta": "Hi there! How can I help you today?"
|
||||
}),
|
||||
json!({
|
||||
"type": "response.completed",
|
||||
"sequence_number": 3,
|
||||
"response": {
|
||||
"id": "resp_test",
|
||||
"object": "response",
|
||||
"created_at": 1755133833,
|
||||
"status": "completed",
|
||||
"model": "gpt-5-nano",
|
||||
"output": [],
|
||||
"usage": {
|
||||
"input_tokens": 8,
|
||||
"output_tokens": 10,
|
||||
"total_tokens": 18
|
||||
}
|
||||
}
|
||||
})
|
||||
);
|
||||
ResponseTemplate::new(200)
|
||||
.set_body_string(sse_response)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.await;
|
||||
|
||||
let provider = create_test_provider(&mock_server.uri());
|
||||
(mock_server, capture, provider)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue