diff --git a/docs/release-control/v6/internal/subsystems/ai-runtime.md b/docs/release-control/v6/internal/subsystems/ai-runtime.md index 11223231e..acdc04306 100644 --- a/docs/release-control/v6/internal/subsystems/ai-runtime.md +++ b/docs/release-control/v6/internal/subsystems/ai-runtime.md @@ -108,7 +108,7 @@ runtime cost control, and shared AI transport surfaces. ## Completion Obligations -1. Update this contract when canonical AI runtime or transport entry points move, including transport-level provider request-shape changes such as DeepSeek `tool_choice` coercion +1. Update this contract when canonical AI runtime or transport entry points move, including transport-level provider request-shape changes such as DeepSeek `tool_choice` coercion, and runtime-failure classification splits (for example separating forced tool selection rejection, no tool-capable endpoint, and generic model-level lack of tool support into distinct causes) 2. Keep AI runtime and shared API proof routing aligned in `registry.json` 3. Preserve explicit coverage for chat, Patrol, remediation, and cost-control behavior when AI runtime changes Patrol runtime failures are part of that runtime contract: provider, model, diff --git a/internal/ai/patrol_assistant_handoff_test.go b/internal/ai/patrol_assistant_handoff_test.go index 7cc425eef..c5a6f43d7 100644 --- a/internal/ai/patrol_assistant_handoff_test.go +++ b/internal/ai/patrol_assistant_handoff_test.go @@ -52,7 +52,7 @@ func TestBuildPatrolRunAssistantHandoffUsesBackendSafeRunContext(t *testing.T) { "Run Type: Scoped run", "Trigger: Alert fired", "Runtime Failure: Selected model does not support Patrol tools", - "Provider rejected Patrol tool calls", + "no tool-capable endpoint", "Patrol Analysis: Visible runtime summary.", "Operator Boundary:", } { diff --git a/internal/ai/patrol_readiness.go b/internal/ai/patrol_readiness.go index 7b679d723..03a9e33f0 100644 --- a/internal/ai/patrol_readiness.go +++ b/internal/ai/patrol_readiness.go @@ -25,6 +25,8 @@ const ( PatrolFailureCauseModelProviderUnconfigured PatrolFailureCause = "model_provider_unconfigured" PatrolFailureCauseModelUnsupportedTools PatrolFailureCause = "model_unsupported_tools" PatrolFailureCauseModelToolSupportUnverified PatrolFailureCause = "model_tool_support_unverified" + PatrolFailureCauseToolChoiceRejected PatrolFailureCause = "tool_choice_rejected" + PatrolFailureCauseNoToolCapableEndpoint PatrolFailureCause = "no_tool_capable_endpoint" PatrolFailureCauseModelUnavailable PatrolFailureCause = "model_unavailable" PatrolFailureCauseContextWindowTooSmall PatrolFailureCause = "context_window_too_small" PatrolFailureCauseProviderBilling PatrolFailureCause = "provider_billing" diff --git a/internal/ai/patrol_runtime_failure.go b/internal/ai/patrol_runtime_failure.go index bebb8e465..73f8803f3 100644 --- a/internal/ai/patrol_runtime_failure.go +++ b/internal/ai/patrol_runtime_failure.go @@ -75,6 +75,31 @@ type PatrolRuntimeFailureDiagnostic struct { Recommendation string } +// patrolToolChoiceValueRejected reports whether the upstream error indicates +// the provider rejected the specific tool_choice value Pulse sent (for +// example, "deepseek-reasoner does not support this tool_choice"). This is +// distinct from the model truly lacking tool support: the model accepts +// tools but not the requested coercion. +func patrolToolChoiceValueRejected(lower string) bool { + if !strings.Contains(lower, "tool_choice") { + return false + } + return strings.Contains(lower, "does not support this tool_choice") || + strings.Contains(lower, "tool_choice is not supported") || + strings.Contains(lower, "tool_choice value is not supported") || + strings.Contains(lower, "invalid tool_choice") || + strings.Contains(lower, "unsupported tool_choice") +} + +// patrolNoToolCapableEndpoint reports whether the upstream error indicates +// the provider has no available endpoint that supports tools for the +// selected model. OpenRouter surfaces this as "No endpoints found that +// support tool use" when account-level provider or data-policy filters +// exclude every tool-capable route. +func patrolNoToolCapableEndpoint(lower string) bool { + return strings.Contains(lower, "no endpoints found") && strings.Contains(lower, "tool") +} + func ClassifyPatrolRuntimeFailure(err error) PatrolRuntimeFailureDiagnostic { failure := patrolRuntimeFailureFromError(err) return PatrolRuntimeFailureDiagnostic{ @@ -105,10 +130,21 @@ func patrolRuntimeFailureFromError(err error) patrolRuntimeFailure { } switch { + case patrolToolChoiceValueRejected(lower): + failure.Title = "Pulse Patrol: Provider rejected forced tool selection" + failure.Summary = "Provider rejected forced tool selection" + failure.Cause = PatrolFailureCauseToolChoiceRejected + failure.Description = "Pulse Patrol reached the provider and the model accepts tools, but the provider rejected the specific tool-selection coercion Pulse sent. This usually means the routed model accepts tools yet does not honour a request to force a particular tool, only automatic selection." + failure.Recommendation = "Pulse will retry with automatic tool selection on the next Patrol run. If the failure persists, switch Patrol to a different model or provider where forced tool selection is accepted, or report the model in question." + case patrolNoToolCapableEndpoint(lower): + failure.Title = "Pulse Patrol: No tool-capable provider endpoint available" + failure.Summary = "No tool-capable provider endpoint available" + failure.Cause = PatrolFailureCauseNoToolCapableEndpoint + failure.Description = "Pulse Patrol reached the provider, but the provider reports no available endpoint that supports tool calling for the selected model. For OpenRouter this typically reflects account-level provider or data-policy filters that exclude every tool-capable route, leaving only routes that do not support tools." + failure.Recommendation = "Review provider routing and privacy filters (for OpenRouter, the Privacy / Data Policy settings and per-model allowed providers), broaden the allowed providers, or switch Patrol to a model with broader tool support." case strings.Contains(lower, "tool_choice") || strings.Contains(lower, "tool calling") || - strings.Contains(lower, "tools are not supported") || - strings.Contains(lower, "no endpoints found") && strings.Contains(lower, "tool"): + strings.Contains(lower, "tools are not supported"): failure.Title = "Pulse Patrol: Selected model does not support Patrol tools" failure.Summary = "Selected model does not support Patrol tools" failure.Cause = PatrolFailureCauseModelUnsupportedTools @@ -206,10 +242,13 @@ func summarizePatrolRuntimeFailureDetail(raw string) string { } lower := strings.ToLower(raw) switch { + case patrolToolChoiceValueRejected(lower): + return "Provider rejected Pulse's forced tool selection. Pulse will retry with automatic tool selection on the next Patrol run." + case patrolNoToolCapableEndpoint(lower): + return "Provider has no tool-capable endpoint for the selected model. Review provider routing or privacy filters." case strings.Contains(lower, "tool_choice") || strings.Contains(lower, "tool calling") || - strings.Contains(lower, "tools are not supported") || - strings.Contains(lower, "no endpoints found") && strings.Contains(lower, "tool"): + strings.Contains(lower, "tools are not supported"): return "Provider rejected Patrol tool calls. Choose a Patrol model and endpoint with tool-call support." case strings.Contains(lower, "reasoning_content"): return "Provider rejected Patrol reasoning state. Retry with a provider route that supports the selected model's reasoning and tool protocol." diff --git a/internal/ai/patrol_runtime_failure_test.go b/internal/ai/patrol_runtime_failure_test.go index bf7641cf4..0da8fcb7e 100644 --- a/internal/ai/patrol_runtime_failure_test.go +++ b/internal/ai/patrol_runtime_failure_test.go @@ -41,29 +41,75 @@ func TestPatrolRuntimeFailureFromError_PopulatesImpactForAllCauses(t *testing.T) } } -func TestPatrolRuntimeFailureFromError_ClassifiesToolCallingUnsupported(t *testing.T) { +func TestPatrolRuntimeFailureFromError_ClassifiesNoToolCapableEndpoint(t *testing.T) { + // OpenRouter surfaces this when account-level provider/data filters + // exclude every tool-capable route for the selected model. err := errors.New(`agentic patrol failed: API error (404): {"error":{"message":"No endpoints found that support the provided 'tool_choice' value."}}`) failure := patrolRuntimeFailureFromError(err) + if failure.Title != "Pulse Patrol: No tool-capable provider endpoint available" { + t.Fatalf("unexpected title %q", failure.Title) + } + if failure.Summary != "No tool-capable provider endpoint available" { + t.Fatalf("unexpected summary %q", failure.Summary) + } + if failure.Cause != PatrolFailureCauseNoToolCapableEndpoint { + t.Fatalf("unexpected cause %q", failure.Cause) + } + if !strings.Contains(failure.Recommendation, "routing") && !strings.Contains(failure.Recommendation, "filters") { + t.Fatalf("expected recommendation to mention routing/filters, got %q", failure.Recommendation) + } + if strings.Contains(failure.Evidence, "tool_choice") || strings.Contains(failure.Evidence, "No endpoints found") { + t.Fatalf("evidence leaked raw provider detail: %q", failure.Evidence) + } + if !strings.Contains(failure.Evidence, "no tool-capable endpoint") { + t.Fatalf("expected evidence to keep safe classified detail, got %q", failure.Evidence) + } +} + +func TestPatrolRuntimeFailureFromError_ClassifiesToolChoiceValueRejected(t *testing.T) { + // Direct DeepSeek path: provider accepts tools but rejects forced + // tool_choice. Pre-fix this misclassified as "model does not support + // tools" and pointed operators at the wrong remediation. + err := errors.New(`agentic patrol failed: provider error: API error (400): deepseek-reasoner does not support this tool_choice`) + + failure := patrolRuntimeFailureFromError(err) + + if failure.Title != "Pulse Patrol: Provider rejected forced tool selection" { + t.Fatalf("unexpected title %q", failure.Title) + } + if failure.Summary != "Provider rejected forced tool selection" { + t.Fatalf("unexpected summary %q", failure.Summary) + } + if failure.Cause != PatrolFailureCauseToolChoiceRejected { + t.Fatalf("unexpected cause %q", failure.Cause) + } + if !strings.Contains(failure.Recommendation, "automatic tool selection") { + t.Fatalf("expected recommendation to mention automatic tool selection, got %q", failure.Recommendation) + } + if strings.Contains(failure.Evidence, "deepseek-reasoner") || strings.Contains(failure.Evidence, "API error (400)") { + t.Fatalf("evidence leaked raw provider detail: %q", failure.Evidence) + } + if !strings.Contains(failure.Evidence, "rejected") { + t.Fatalf("expected evidence to keep safe classified detail, got %q", failure.Evidence) + } +} + +func TestPatrolRuntimeFailureFromError_ClassifiesGenericToolUnsupported(t *testing.T) { + // Generic "tools are not supported" fallback for providers that say + // the model truly cannot call tools (not a value-rejection or routing + // problem). + err := errors.New(`API error (400): tools are not supported by this model family`) + + failure := patrolRuntimeFailureFromError(err) + if failure.Title != "Pulse Patrol: Selected model does not support Patrol tools" { t.Fatalf("unexpected title %q", failure.Title) } - if failure.Summary != "Selected model does not support Patrol tools" { - t.Fatalf("unexpected summary %q", failure.Summary) - } if failure.Cause != PatrolFailureCauseModelUnsupportedTools { t.Fatalf("unexpected cause %q", failure.Cause) } - if !strings.Contains(failure.Recommendation, "supports tool calling") { - t.Fatalf("expected recommendation to mention tool calling, got %q", failure.Recommendation) - } - if strings.Contains(failure.Evidence, "tool_choice") || strings.Contains(failure.Evidence, "No endpoints found") { - t.Fatalf("evidence leaked raw provider detail: %q", failure.Evidence) - } - if !strings.Contains(failure.Evidence, "Provider rejected Patrol tool calls") { - t.Fatalf("expected evidence to keep safe classified detail, got %q", failure.Evidence) - } } func TestPatrolRuntimeFailureFromError_ClassifiesUnavailableModel(t *testing.T) { @@ -205,13 +251,13 @@ func TestRunPatrolRecordsStructuredRuntimeFailure(t *testing.T) { if len(runs) != 1 { t.Fatalf("expected one patrol run, got %d", len(runs)) } - if runs[0].ErrorSummary != "Selected model does not support Patrol tools" { + if runs[0].ErrorSummary != "No tool-capable provider endpoint available" { t.Fatalf("expected structured run error summary, got %q", runs[0].ErrorSummary) } if strings.Contains(runs[0].ErrorDetail, "tool_choice") || strings.Contains(runs[0].ErrorDetail, "No endpoints found") { t.Fatalf("run error detail leaked raw provider message: %q", runs[0].ErrorDetail) } - if !strings.Contains(runs[0].ErrorDetail, "Provider rejected Patrol tool calls") { + if !strings.Contains(runs[0].ErrorDetail, "no tool-capable endpoint") { t.Fatalf("expected run error detail to preserve safe classified detail, got %q", runs[0].ErrorDetail) } @@ -219,10 +265,10 @@ func TestRunPatrolRecordsStructuredRuntimeFailure(t *testing.T) { if finding == nil { t.Fatal("expected Patrol runtime finding") } - if finding.Title != "Pulse Patrol: Selected model does not support Patrol tools" { + if finding.Title != "Pulse Patrol: No tool-capable provider endpoint available" { t.Fatalf("unexpected runtime finding title %q", finding.Title) } - if finding.FailureCause != string(PatrolFailureCauseModelUnsupportedTools) { + if finding.FailureCause != string(PatrolFailureCauseNoToolCapableEndpoint) { t.Fatalf("unexpected runtime finding cause %q", finding.FailureCause) } } @@ -245,7 +291,7 @@ func TestPatrolRunRecordJSONNormalizesRuntimeFailureDetail(t *testing.T) { t.Fatalf("marshaled run leaked raw provider detail %q: %s", raw, text) } } - if !strings.Contains(text, "Provider rejected Patrol tool calls") { + if !strings.Contains(text, "rejected") { t.Fatalf("expected safe classified detail in marshaled run, got %s", text) } } diff --git a/internal/api/ai_handler_test.go b/internal/api/ai_handler_test.go index c2b261d43..386b72643 100644 --- a/internal/api/ai_handler_test.go +++ b/internal/api/ai_handler_test.go @@ -822,7 +822,7 @@ func TestHandleChat_RehydratesPatrolRunHandoffContextFromBackend(t *testing.T) { assert.Contains(t, reqArg.HandoffContext, "Source: Pulse Patrol run history") assert.Contains(t, reqArg.HandoffContext, "Run ID: run-runtime-error") assert.Contains(t, reqArg.HandoffContext, "Runtime Failure: Selected model does not support Patrol tools") - assert.Contains(t, reqArg.HandoffContext, "Provider rejected Patrol tool calls") + assert.Contains(t, reqArg.HandoffContext, "no tool-capable endpoint") assert.Contains(t, reqArg.HandoffContext, "Patrol Analysis: Visible runtime summary.") assert.NotContains(t, reqArg.HandoffContext, "browser-authored stale context") assert.NotContains(t, reqArg.HandoffContext, "provider trace")