From f7992e8e781e43c3dbb36eba2ffeb7669fdfebc0 Mon Sep 17 00:00:00 2001 From: rcourtman Date: Thu, 7 May 2026 20:45:18 +0100 Subject: [PATCH] Return safe provider preflight diagnostics Classify Assistant and Patrol provider-test failures through the Patrol runtime failure taxonomy, redact secret-shaped provider evidence, and preserve safe recommendations in the settings shell. Refs #1463 --- .../v6/internal/subsystems/agent-lifecycle.md | 4 + .../v6/internal/subsystems/ai-runtime.md | 7 + .../v6/internal/subsystems/api-contracts.md | 11 ++ .../subsystems/frontend-primitives.md | 5 +- .../internal/subsystems/storage-recovery.md | 8 +- frontend-modern/src/api/__tests__/ai.test.ts | 34 +++- frontend-modern/src/api/ai.ts | 7 +- .../Settings/__tests__/AISettings.test.tsx | 10 +- .../__tests__/settingsArchitecture.test.ts | 15 +- .../components/Settings/aiSettingsModel.ts | 18 +- .../components/Settings/useAISettingsState.ts | 56 +++++- frontend-modern/src/types/ai.ts | 15 ++ internal/ai/patrol_runtime_failure.go | 57 +++++- internal/ai/patrol_runtime_failure_test.go | 30 +++ internal/api/ai_handlers.go | 87 ++++++--- internal/api/ai_handlers_test.go | 177 ++++++++++++++++-- .../52-ai-settings-provider-setup.spec.ts | 110 +++++++++++ 17 files changed, 583 insertions(+), 68 deletions(-) diff --git a/docs/release-control/v6/internal/subsystems/agent-lifecycle.md b/docs/release-control/v6/internal/subsystems/agent-lifecycle.md index 8dfe5315c..91111efd7 100644 --- a/docs/release-control/v6/internal/subsystems/agent-lifecycle.md +++ b/docs/release-control/v6/internal/subsystems/agent-lifecycle.md @@ -1111,6 +1111,10 @@ AI handlers add split scoped-trigger fields, recency labels, or trigger-state transport for Patrol, lifecycle-adjacent setup and fleet surfaces must treat those payloads as Patrol-only runtime context and must not reinterpret them as agent install readiness, enrollment health, or fleet-control state. +Provider preflight diagnostics on the same handler remain AI runtime readiness +context as well: lifecycle-adjacent setup and fleet surfaces may not treat +`provider_auth`, `provider_connection`, model-selection, or provider-settings +recommendations as agent registration, updater trust, or fleet-control health. That same shared AI handler dependency also assumes direct alert-investigation execution mode is AI/API-owned. Request-scoped `AutonomousMode:false` and `RequireCommandApproval:true` on `/api/ai/investigate-alert` are Assistant diff --git a/docs/release-control/v6/internal/subsystems/ai-runtime.md b/docs/release-control/v6/internal/subsystems/ai-runtime.md index 3b27090d4..16f634aab 100644 --- a/docs/release-control/v6/internal/subsystems/ai-runtime.md +++ b/docs/release-control/v6/internal/subsystems/ai-runtime.md @@ -59,6 +59,13 @@ runtime cost control, and shared AI transport surfaces. 2. Add or change canonical AI provider config, provider-scoped model selection, or runtime auth/base-URL defaults through `internal/config/ai.go` 3. Add or change Pulse Assistant request flow through `internal/api/ai_handler.go`, `frontend-modern/src/api/ai.ts`, and `frontend-modern/src/api/aiChat.ts` 4. Add or change Patrol, alert-analysis, or remediation transport through `internal/api/ai_handlers.go`, `internal/api/ai_intelligence_handlers.go`, and `frontend-modern/src/api/patrol.ts` + Provider preflight diagnostics returned from `internal/api/ai_handlers.go` + must reuse the Patrol runtime failure classifier in `internal/ai/` and + expose only safe operator-facing cause, summary, recommendation, model, and + action fields. Raw provider response bodies and transport errors may be + logged server-side or attached as redacted internal Patrol evidence where + governed, but they must not be returned through the browser provider-test + contract. 5. Add or change AI usage/cost dashboard presentation through `frontend-modern/src/components/AI/AICostDashboard.tsx` and `frontend-modern/src/utils/aiCostPresentation.ts` 6. Add or change AI provider, control-level, chat/session, or explore-state presentation through `frontend-modern/src/components/AI/Chat/`, `frontend-modern/src/utils/aiProviderPresentation.ts`, `frontend-modern/src/utils/aiProviderHealthPresentation.ts`, `frontend-modern/src/utils/aiControlLevelPresentation.ts`, `frontend-modern/src/utils/aiChatPresentation.ts`, `frontend-modern/src/utils/aiSessionDiffPresentation.ts`, and `frontend-modern/src/utils/aiExplorePresentation.ts` 7. Keep AI chat presentation helpers aligned through `frontend-modern/src/components/AI/Chat/` and the shared `frontend-modern/src/utils/textPresentation.ts` diff --git a/docs/release-control/v6/internal/subsystems/api-contracts.md b/docs/release-control/v6/internal/subsystems/api-contracts.md index 576226ff2..5353916c1 100644 --- a/docs/release-control/v6/internal/subsystems/api-contracts.md +++ b/docs/release-control/v6/internal/subsystems/api-contracts.md @@ -217,6 +217,17 @@ product API routes free of maintainer commercial analytics. runtime-failure boolean needed for drawer/session presentation, and run-specific fields remain reserved for `patrol_run`. 34. `internal/api/ai_handlers.go` shared with `ai-runtime`: AI settings and remediation handlers are both an AI runtime control surface and a canonical API payload contract boundary. + Provider test responses from `/api/ai/test` and provider-specific + `/api/ai/test/{provider}` preflight responses must return one safe + structured diagnostic envelope: `success`, `message`, optional `model`, + `cause`, `summary`, `recommendation`, and `action`, plus `provider` on the + provider-specific endpoint. + Failure payloads must use the AI runtime's Patrol failure-cause vocabulary + and safe remediation text instead of returning raw upstream provider errors, + while still leaving those raw details available only to server logs or + redacted governed internal Patrol evidence. The frontend API client and + settings shell must treat this payload as the canonical provider health + contract rather than parsing free-form provider error strings. 35. `internal/api/ai_intelligence_handlers.go` shared with `ai-runtime`: AI intelligence handlers are both an AI runtime control surface and a canonical API payload contract boundary. 36. `internal/api/config_setup_handlers.go` shared with `agent-lifecycle`: auto-register and setup handlers are both an agent lifecycle control surface and a canonical API payload contract boundary. That same shared boundary also owns reachable-host selection truth for canonical Proxmox registration: runtime callers may propose ordered `candidateHosts`, but the API contract must persist and echo the first candidate Pulse can actually reach instead of freezing the caller's rejected first preference into the stored node endpoint. diff --git a/docs/release-control/v6/internal/subsystems/frontend-primitives.md b/docs/release-control/v6/internal/subsystems/frontend-primitives.md index 7c731e3fe..1ea154ec4 100644 --- a/docs/release-control/v6/internal/subsystems/frontend-primitives.md +++ b/docs/release-control/v6/internal/subsystems/frontend-primitives.md @@ -732,7 +732,10 @@ frontend primitive boundary. configuration rather than as a generic `AI Services` shell. Settings-save feedback must preserve provider-specific preflight failures and successful save responses that carry Patrol readiness warnings, including the provider, - selected Patrol model, and readiness summary when those fields are present. + selected Patrol model, failure cause, safe recommendation, and readiness + summary when those fields are present. The settings shell may compose that + safe backend diagnostic for display, but it must not infer provider + remediation by parsing raw upstream error strings in the browser. Runtime controls inside `frontend-modern/src/components/Settings/AIRuntimeControlsSection.tsx` must likewise describe discovery as workload discovery that supplies concrete service context to Pulse Assistant and Patrol, not as a generic diff --git a/docs/release-control/v6/internal/subsystems/storage-recovery.md b/docs/release-control/v6/internal/subsystems/storage-recovery.md index e1bf3e91a..bda2bf167 100644 --- a/docs/release-control/v6/internal/subsystems/storage-recovery.md +++ b/docs/release-control/v6/internal/subsystems/storage-recovery.md @@ -400,7 +400,7 @@ bypass the API fail-closed execution gate. hosted diagnostics do not collapse into false free-tier behavior. 19. Preserve shipped local security-doc guidance in shared `internal/api/` config/setup helpers so storage- and recovery-adjacent transport surfaces do not reintroduce GitHub `main` security links when the running build already serves its own local security documentation route. 20. Keep shared `internal/api/` Patrol transport and alert-trigger edits feature-isolated: Patrol-specific recency fields, callback fan-out, or alert-bridge wiring changes must not leak into recovery queries, storage links, or recovery-adjacent install/setup flows unless this contract changes in the same slice. - The same adjacency rule applies to AI settings transport in `internal/api/ai_handlers.go`: provider auth state, masked-secret payload fields, and provider-test model selection remain AI/runtime plus API-contract concerns and must not be absorbed into storage/recovery transport ownership just because those handlers live under the shared backend API tree. + The same adjacency rule applies to AI settings transport in `internal/api/ai_handlers.go`: provider auth state, masked-secret payload fields, provider-test model selection, and safe provider preflight diagnostics remain AI/runtime plus API-contract concerns and must not be absorbed into storage/recovery transport ownership just because those handlers live under the shared backend API tree. Direct alert-investigation execution controls in `internal/api/ai_handlers.go` follow that same split: request-scoped `AutonomousMode:false` and `RequireCommandApproval:true` are AI action-governance constraints, not @@ -2782,6 +2782,12 @@ shared AI/runtime wiring and the poller's provider selection path, but storage and recovery surfaces must not grow a second recovery-local config transport or provider-shaped configuration payload just because those reads can inform operator investigation. +Provider preflight diagnostics returned by shared AI settings handlers are the +same AI runtime readiness context. Storage and recovery surfaces may use the +resulting safe recommendation to direct an operator back to Assistant & Patrol +settings, but they must not reinterpret provider auth, provider connection, or +model-selection causes as recovery-source health, backup readiness, or +storage-control capability. That bounded projection is the current TrueNAS floor for storage and recovery: operators can inspect TrueNAS pools, datasets, disks, snapshots, and replication artifacts through the shared storage and recovery pages plus diff --git a/frontend-modern/src/api/__tests__/ai.test.ts b/frontend-modern/src/api/__tests__/ai.test.ts index dd7c81f7f..bc9005f97 100644 --- a/frontend-modern/src/api/__tests__/ai.test.ts +++ b/frontend-modern/src/api/__tests__/ai.test.ts @@ -120,6 +120,27 @@ describe('AIAPI', () => { }); }); + it('returns provider preflight diagnostics without narrowing the API payload', async () => { + const diagnostic = { + success: false, + message: 'Provider authentication issue', + provider: 'openrouter', + model: 'openrouter:deepseek/deepseek-r1', + cause: 'provider_auth', + summary: + 'Pulse Patrol cannot analyze your infrastructure because the provider rejected the configured credentials or account access.', + recommendation: + 'Check the API key or provider authentication in Patrol provider settings, then rerun Patrol.', + action: 'open_provider_settings', + }; + apiFetchJSONMock.mockResolvedValueOnce(diagnostic as any); + + await expect(AIAPI.testProvider('openrouter')).resolves.toMatchObject(diagnostic); + expect(apiFetchJSONMock).toHaveBeenCalledWith('/api/ai/test/openrouter', { + method: 'POST', + }); + }); + it('fetches canonical intelligence summaries with encoded resource ids', async () => { apiFetchJSONMock.mockResolvedValueOnce({} as any); await AIAPI.getIntelligenceSummary(); @@ -139,9 +160,12 @@ describe('AIAPI', () => { }); it('treats 402 responses from optional AI paywalled collections as empty state', async () => { - const paymentRequiredError = Object.assign(new Error('Approval management requires Pulse Pro'), { - status: 402, - }); + const paymentRequiredError = Object.assign( + new Error('Approval management requires Pulse Pro'), + { + status: 402, + }, + ); apiFetchJSONMock.mockRejectedValueOnce(paymentRequiredError); await expect(AIAPI.getPendingApprovals()).resolves.toEqual([]); @@ -179,7 +203,9 @@ describe('AIAPI', () => { }); it('does not swallow non-404 investigation lookup failures', async () => { - apiFetchJSONMock.mockRejectedValueOnce(Object.assign(new Error('Payment Required'), { status: 402 })); + apiFetchJSONMock.mockRejectedValueOnce( + Object.assign(new Error('Payment Required'), { status: 402 }), + ); await expect(AIAPI.getInvestigation('finding-2')).rejects.toThrow('Payment Required'); apiFetchJSONMock.mockRejectedValueOnce(new Error('backend down')); diff --git a/frontend-modern/src/api/ai.ts b/frontend-modern/src/api/ai.ts index b9e804265..d01e65b34 100644 --- a/frontend-modern/src/api/ai.ts +++ b/frontend-modern/src/api/ai.ts @@ -13,6 +13,7 @@ import type { AISettings, AISettingsUpdateRequest, AITestResult, + AIProviderTestResult, AIExecuteResponse, AIStreamEvent, AICostSummary, @@ -49,12 +50,10 @@ export class AIAPI { } // Test a specific provider connection - static async testProvider( - provider: string, - ): Promise<{ success: boolean; message: string; provider: string }> { + static async testProvider(provider: string): Promise { return apiFetchJSON(`${this.baseUrl}/ai/test/${encodeURIComponent(provider)}`, { method: 'POST', - }) as Promise<{ success: boolean; message: string; provider: string }>; + }) as Promise; } // Get available models from the AI provider diff --git a/frontend-modern/src/components/Settings/__tests__/AISettings.test.tsx b/frontend-modern/src/components/Settings/__tests__/AISettings.test.tsx index 4991f11e7..d7bbf5827 100644 --- a/frontend-modern/src/components/Settings/__tests__/AISettings.test.tsx +++ b/frontend-modern/src/components/Settings/__tests__/AISettings.test.tsx @@ -514,9 +514,12 @@ describe('AISettings provider save failure context', () => { testProviderMock.mockImplementation(async (provider: string) => ({ success: provider !== 'openrouter', message: + provider === 'openrouter' ? 'Provider authentication issue' : `${provider} reachable`, + recommendation: provider === 'openrouter' - ? 'OpenRouter returned 401 during provider preflight' - : `${provider} reachable`, + ? 'Check the API key or provider authentication in Patrol provider settings, then rerun Patrol.' + : undefined, + cause: provider === 'openrouter' ? 'provider_auth' : undefined, provider, })); updateSettingsMock.mockRejectedValue(new Error('Unable to save Assistant & Patrol settings.')); @@ -536,7 +539,8 @@ describe('AISettings provider save failure context', () => { }); const message = String(notificationErrorMock.mock.calls.at(-1)?.[0] ?? ''); expect(message).toContain('model openrouter:deepseek/deepseek-r1'); - expect(message).toContain('OpenRouter returned 401 during provider preflight'); + expect(message).toContain('Provider authentication issue'); + expect(message).toContain('Check the API key or provider authentication'); expect(message).toContain('Unable to save Assistant & Patrol settings.'); }); diff --git a/frontend-modern/src/components/Settings/__tests__/settingsArchitecture.test.ts b/frontend-modern/src/components/Settings/__tests__/settingsArchitecture.test.ts index 25f8fc5fb..8a7e1a76c 100644 --- a/frontend-modern/src/components/Settings/__tests__/settingsArchitecture.test.ts +++ b/frontend-modern/src/components/Settings/__tests__/settingsArchitecture.test.ts @@ -213,6 +213,16 @@ describe('settings architecture guardrails', () => { ); }); + it('keeps Assistant and Patrol provider diagnostics backend-owned in settings state', () => { + expect(aiSettingsModelSource).toContain( + 'export type ProviderTestResult = AIProviderTestResult', + ); + expect(aiSettingsStateSource).toContain('getProviderTestDiagnosticMessage(result)'); + expect(aiSettingsStateSource).toContain('recommendation: result.recommendation'); + expect(aiSettingsStateSource).toContain('providerHealth[erroredCandidate].message'); + expect(aiSettingsStateSource).not.toContain('OpenRouter returned 401'); + }); + it('keeps contextual settings feature gates free of retired commercial telemetry wrappers', () => { for (const source of [ agentProfilesPanelSource, @@ -385,9 +395,8 @@ describe('settings architecture guardrails', () => { expect(aiModelSelectionSectionSource).not.toContain('([]);', diff --git a/frontend-modern/src/components/Settings/aiSettingsModel.ts b/frontend-modern/src/components/Settings/aiSettingsModel.ts index db01d888e..5775a0503 100644 --- a/frontend-modern/src/components/Settings/aiSettingsModel.ts +++ b/frontend-modern/src/components/Settings/aiSettingsModel.ts @@ -1,7 +1,12 @@ import type { SelectionCardOption } from '@/components/shared/SelectionCardGroup'; import type { AIProviderHealthStatus } from '@/utils/aiProviderHealthPresentation'; import { getProviderFromModelId } from '@/utils/aiProviderPresentation'; -import type { AIProvider, AISettings as AISettingsType, ModelInfo } from '@/types/ai'; +import type { + AIProvider, + AIProviderTestResult, + AISettings as AISettingsType, + ModelInfo, +} from '@/types/ai'; export type AIProviderCredentialsFormState = { anthropicApiKey: string; @@ -37,13 +42,14 @@ export type AIProviderConfig = { export type ProviderHealthState = { status: AIProviderHealthStatus; message: string; + model?: string; + cause?: string; + summary?: string; + recommendation?: string; + action?: string; }; -export type ProviderTestResult = { - provider: AIProvider; - success: boolean; - message: string; -}; +export type ProviderTestResult = AIProviderTestResult; export type AIAvailableModel = ModelInfo; diff --git a/frontend-modern/src/components/Settings/useAISettingsState.ts b/frontend-modern/src/components/Settings/useAISettingsState.ts index 66ebce7a6..fd1bbe892 100644 --- a/frontend-modern/src/components/Settings/useAISettingsState.ts +++ b/frontend-modern/src/components/Settings/useAISettingsState.ts @@ -65,6 +65,12 @@ const compactUnique = (values: Array): string[] => { return result; }; +const getProviderTestDiagnosticMessage = (result: { + message?: string; + recommendation?: string; +}): string => + compactUnique([result.message, result.recommendation]).join(' ยท ') || 'Connection test failed'; + const isKnownAIProvider = (provider: string): provider is AIProvider => AI_PROVIDERS.includes(provider as AIProvider); @@ -453,14 +459,25 @@ export const useAISettingsState = () => { ): Promise => { try { const result = await AIAPI.testProvider(provider); + const message = getProviderTestDiagnosticMessage(result); const normalizedResult: ProviderTestResult = { provider, success: result.success, - message: result.message, + message, + model: result.model, + cause: result.cause, + summary: result.summary, + recommendation: result.recommendation, + action: result.action, }; setProviderHealth(provider, { status: result.success ? 'ok' : 'error', - message: result.message || '', + message, + model: result.model, + cause: result.cause, + summary: result.summary, + recommendation: result.recommendation, + action: result.action, }); if (opts.storeManualResult) { setProviderTestResult(normalizedResult); @@ -476,7 +493,15 @@ export const useAISettingsState = () => { } catch (error) { const message = error instanceof Error ? error.message : 'Connection test failed'; const result: ProviderTestResult = { provider, success: false, message }; - setProviderHealth(provider, { status: 'error', message }); + setProviderHealth(provider, { + status: 'error', + message, + model: undefined, + cause: undefined, + summary: undefined, + recommendation: undefined, + action: undefined, + }); if (opts.storeManualResult) { setProviderTestResult(result); } @@ -498,7 +523,15 @@ export const useAISettingsState = () => { ); for (const provider of AI_PROVIDERS) { if (!configuredProviders.includes(provider)) { - setProviderHealth(provider, { status: 'not_configured', message: '' }); + setProviderHealth(provider, { + status: 'not_configured', + message: '', + model: undefined, + cause: undefined, + summary: undefined, + recommendation: undefined, + action: undefined, + }); } } if (configuredProviders.length === 0) { @@ -510,7 +543,15 @@ export const useAISettingsState = () => { try { await Promise.all( configuredProviders.map(async (provider) => { - setProviderHealth(provider, { status: 'checking', message: '' }); + setProviderHealth(provider, { + status: 'checking', + message: '', + model: undefined, + cause: undefined, + summary: undefined, + recommendation: undefined, + action: undefined, + }); await checkProviderHealth(provider, { notify: false, storeManualResult: false }); }), ); @@ -859,10 +900,11 @@ export const useAISettingsState = () => { setTesting(true); try { const result = await AIAPI.testConnection(); + const message = getProviderTestDiagnosticMessage(result); if (result.success) { - notificationStore.success(result.message); + notificationStore.success(message); } else { - notificationStore.error(result.message); + notificationStore.error(message); } } catch (error) { logger.error('[AISettings] Test failed:', error); diff --git a/frontend-modern/src/types/ai.ts b/frontend-modern/src/types/ai.ts index f925fae96..d1cfe5f51 100644 --- a/frontend-modern/src/types/ai.ts +++ b/frontend-modern/src/types/ai.ts @@ -131,6 +131,21 @@ export interface AITestResult { success: boolean; message: string; model?: string; + cause?: string; + summary?: string; + recommendation?: string; + action?: string; +} + +export interface AIProviderTestResult { + success: boolean; + message: string; + provider: AIProvider; + model?: string; + cause?: string; + summary?: string; + recommendation?: string; + action?: string; } // Provider descriptions diff --git a/internal/ai/patrol_runtime_failure.go b/internal/ai/patrol_runtime_failure.go index 8155459c3..02c92f6fd 100644 --- a/internal/ai/patrol_runtime_failure.go +++ b/internal/ai/patrol_runtime_failure.go @@ -2,6 +2,7 @@ package ai import ( "fmt" + "regexp" "strings" "time" ) @@ -9,6 +10,28 @@ import ( const patrolRuntimeFailureDetailLimit = 2000 const patrolProviderNotConfiguredReason = "Patrol provider not configured - open Assistant & Patrol provider settings, configure a provider, and choose a Patrol model that supports tools" +var patrolRuntimeFailureDetailRedactors = []struct { + pattern *regexp.Regexp + replacement string +}{ + { + pattern: regexp.MustCompile(`(?i)([?&](?:key|api_key|apikey|access_token|token)=)[^\s&"']+`), + replacement: `${1}[redacted]`, + }, + { + pattern: regexp.MustCompile(`(?i)("(?:api[_-]?key|apikey|access[_-]?token|token|authorization|x-api-key)"\s*:\s*")[^"]+`), + replacement: `${1}[redacted]`, + }, + { + pattern: regexp.MustCompile(`(?i)((?:authorization:\s*bearer|x-api-key:)\s+)[^\s,;]+`), + replacement: `${1}[redacted]`, + }, + { + pattern: regexp.MustCompile(`(?i)(https?://)[^\s/@:]+:[^\s/@]+@`), + replacement: `${1}[redacted]@`, + }, +} + type patrolRuntimeFailure struct { Title string Summary string @@ -19,12 +42,31 @@ type patrolRuntimeFailure struct { Evidence string } +type PatrolRuntimeFailureDiagnostic struct { + Title string + Summary string + Cause PatrolFailureCause + Description string + Recommendation string +} + +func ClassifyPatrolRuntimeFailure(err error) PatrolRuntimeFailureDiagnostic { + failure := patrolRuntimeFailureFromError(err) + return PatrolRuntimeFailureDiagnostic{ + Title: failure.Title, + Summary: failure.Summary, + Cause: failure.Cause, + Description: failure.Description, + Recommendation: failure.Recommendation, + } +} + func patrolRuntimeFailureFromError(err error) patrolRuntimeFailure { raw := "" if err != nil { raw = strings.TrimSpace(err.Error()) } - detail := truncateString(raw, patrolRuntimeFailureDetailLimit) + detail := truncateString(redactPatrolRuntimeFailureDetail(raw), patrolRuntimeFailureDetailLimit) lower := strings.ToLower(raw) failure := patrolRuntimeFailure{ @@ -90,6 +132,7 @@ func patrolRuntimeFailureFromError(err error) patrolRuntimeFailure { failure.Description = "Pulse Patrol cannot analyze your infrastructure because the provider rejected the configured credentials or account access." failure.Recommendation = "Check the API key or provider authentication in Patrol provider settings, then rerun Patrol." case strings.Contains(lower, "not configured") || + strings.Contains(lower, "no provider configured") || strings.Contains(lower, "chat service not available") || strings.Contains(lower, "provider not available") || strings.Contains(lower, "failed to create provider"): @@ -103,7 +146,9 @@ func patrolRuntimeFailureFromError(err error) patrolRuntimeFailure { strings.Contains(lower, "no such host") || strings.Contains(lower, "i/o timeout") || strings.Contains(lower, "context deadline exceeded") || - strings.Contains(lower, "timeout"): + strings.Contains(lower, "timeout") || + strings.Contains(lower, "returned status 5") || + strings.Contains(lower, "api error (5"): failure.Title = "Pulse Patrol: Provider connection issue" failure.Summary = "Provider connection issue" failure.Cause = PatrolFailureCauseProviderConnection @@ -118,6 +163,14 @@ func patrolRuntimeFailureFromError(err error) patrolRuntimeFailure { return failure } +func redactPatrolRuntimeFailureDetail(raw string) string { + redacted := raw + for _, redactor := range patrolRuntimeFailureDetailRedactors { + redacted = redactor.pattern.ReplaceAllString(redacted, redactor.replacement) + } + return redacted +} + func newPatrolRuntimeFailureFinding(failure patrolRuntimeFailure, now time.Time) *Finding { return &Finding{ ID: generateFindingID(patrolRuntimeResourceID, "reliability", patrolRuntimeFindingKey), diff --git a/internal/ai/patrol_runtime_failure_test.go b/internal/ai/patrol_runtime_failure_test.go index 831c5115b..1244dc109 100644 --- a/internal/ai/patrol_runtime_failure_test.go +++ b/internal/ai/patrol_runtime_failure_test.go @@ -53,6 +53,36 @@ func TestPatrolRuntimeFailureFromError_ClassifiesUnavailableModel(t *testing.T) } } +func TestClassifyPatrolRuntimeFailureOmitsRawProviderEvidence(t *testing.T) { + diagnostic := ClassifyPatrolRuntimeFailure(errors.New(`API error (401): {"error":"raw upstream credential body"}`)) + + if diagnostic.Summary != "Provider authentication issue" { + t.Fatalf("unexpected summary %q", diagnostic.Summary) + } + if diagnostic.Cause != PatrolFailureCauseProviderAuth { + t.Fatalf("unexpected cause %q", diagnostic.Cause) + } + if strings.Contains(diagnostic.Description, "raw upstream credential body") { + t.Fatalf("description leaked raw provider detail: %q", diagnostic.Description) + } + if strings.Contains(diagnostic.Recommendation, "raw upstream credential body") { + t.Fatalf("recommendation leaked raw provider detail: %q", diagnostic.Recommendation) + } +} + +func TestPatrolRuntimeFailureFromErrorRedactsSecretLikeDetail(t *testing.T) { + failure := patrolRuntimeFailureFromError(errors.New(`request failed: Get "https://generativelanguage.googleapis.com/v1beta/models?key=AIzaSy-secret-token": Authorization: Bearer sk-live-secret {"api_key":"sk-json-secret"} https://user:pass@example.test/v1`)) + + for _, secret := range []string{"AIzaSy-secret-token", "sk-live-secret", "sk-json-secret", "user:pass@"} { + if strings.Contains(failure.Evidence, secret) { + t.Fatalf("evidence leaked secret-shaped detail %q: %s", secret, failure.Evidence) + } + } + if !strings.Contains(failure.Evidence, "[redacted]") { + t.Fatalf("expected evidence to retain redacted context, got %q", failure.Evidence) + } +} + func TestPatrolRuntimeFailureFromError_DefaultIsActionable(t *testing.T) { failure := patrolRuntimeFailureFromError(errors.New("upstream returned unexpected eof")) diff --git a/internal/api/ai_handlers.go b/internal/api/ai_handlers.go index c4e448101..1a8fa520b 100644 --- a/internal/api/ai_handlers.go +++ b/internal/api/ai_handlers.go @@ -2857,22 +2857,34 @@ func (h *AISettingsHandler) HandleTestAIConnection(w http.ResponseWriter, r *htt defer cancel() var testResult struct { - Success bool `json:"success"` - Message string `json:"message"` - Model string `json:"model,omitempty"` + Success bool `json:"success"` + Message string `json:"message"` + Model string `json:"model,omitempty"` + Cause string `json:"cause,omitempty"` + Summary string `json:"summary,omitempty"` + Recommendation string `json:"recommendation,omitempty"` + Action string `json:"action,omitempty"` } + cfg := h.GetAIService(r.Context()).GetConfig() err := h.GetAIService(r.Context()).TestConnection(ctx) if err != nil { + diagnostic := ai.ClassifyPatrolRuntimeFailure(err) testResult.Success = false - testResult.Message = "Connection test failed" + testResult.Message = diagnostic.Summary + testResult.Cause = string(diagnostic.Cause) + testResult.Summary = diagnostic.Description + testResult.Recommendation = diagnostic.Recommendation + testResult.Action = "open_provider_settings" + if cfg != nil { + testResult.Model = config.NormalizeQuickstartModelString(cfg.GetModel()) + } log.Error().Err(err).Msg("AI connection test failed") } else { - cfg := h.GetAIService(r.Context()).GetConfig() testResult.Success = true testResult.Message = "Connection successful" if cfg != nil { - testResult.Model = cfg.GetModel() + testResult.Model = config.NormalizeQuickstartModelString(cfg.GetModel()) } } @@ -2881,6 +2893,47 @@ func (h *AISettingsHandler) HandleTestAIConnection(w http.ResponseWriter, r *htt } } +type aiProviderTestResponse struct { + Success bool `json:"success"` + Message string `json:"message"` + Provider string `json:"provider"` + Model string `json:"model,omitempty"` + Cause string `json:"cause,omitempty"` + Summary string `json:"summary,omitempty"` + Recommendation string `json:"recommendation,omitempty"` + Action string `json:"action,omitempty"` +} + +func newAIProviderTestResponse(provider string) aiProviderTestResponse { + return aiProviderTestResponse{Provider: provider} +} + +func newAIProviderTestNotConfiguredResponse(provider string) aiProviderTestResponse { + return aiProviderTestResponse{ + Success: false, + Message: "Provider not configured", + Provider: provider, + Cause: string(ai.PatrolFailureCauseProviderNotConfigured), + Summary: "Pulse Patrol cannot test this provider because it is not configured for the current Assistant & Patrol settings.", + Recommendation: "Open Assistant & Patrol provider settings, configure the provider credentials or base URL, choose a Patrol model, and run provider preflight again.", + Action: "open_provider_settings", + } +} + +func newAIProviderTestFailureResponse(provider, model string, err error) aiProviderTestResponse { + diagnostic := ai.ClassifyPatrolRuntimeFailure(err) + return aiProviderTestResponse{ + Success: false, + Message: diagnostic.Summary, + Provider: provider, + Model: config.NormalizeQuickstartModelString(model), + Cause: string(diagnostic.Cause), + Summary: diagnostic.Description, + Recommendation: diagnostic.Recommendation, + Action: "open_provider_settings", + } +} + // HandleTestProvider tests a specific AI provider connection (POST /api/ai/test/:provider) // Auth is enforced by RequirePermission middleware at route registration; with // default authorizer, non-admin proxy users are hard-denied (with RBAC, deferred @@ -2914,17 +2967,12 @@ func (h *AISettingsHandler) HandleTestProvider(w http.ResponseWriter, r *http.Re ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) defer cancel() - var testResult struct { - Success bool `json:"success"` - Message string `json:"message"` - Provider string `json:"provider"` - } - testResult.Provider = provider + testResult := newAIProviderTestResponse(provider) // Load config and create provider for testing cfg := h.GetAIService(r.Context()).GetConfig() if cfg == nil { - testResult.Success = false + testResult = newAIProviderTestNotConfiguredResponse(provider) testResult.Message = "Pulse Assistant not configured" if err := utils.WriteJSONResponse(w, testResult); err != nil { log.Error().Err(err).Msg("failed to write JSON response") @@ -2934,8 +2982,7 @@ func (h *AISettingsHandler) HandleTestProvider(w http.ResponseWriter, r *http.Re // Check if provider is configured if !cfg.HasProvider(provider) { - testResult.Success = false - testResult.Message = "Provider not configured" + testResult = newAIProviderTestNotConfiguredResponse(provider) if err := utils.WriteJSONResponse(w, testResult); err != nil { log.Error().Err(err).Msg("failed to write JSON response") } @@ -2945,8 +2992,7 @@ func (h *AISettingsHandler) HandleTestProvider(w http.ResponseWriter, r *http.Re // Create provider and test connection model, err := ai.ResolvePreferredModelForProvider(ctx, cfg, provider) if err != nil { - testResult.Success = false - testResult.Message = "Failed to resolve provider model" + testResult = newAIProviderTestFailureResponse(provider, "", err) log.Error().Err(err).Str("provider", provider).Msg("AI provider model resolution failed") if err := utils.WriteJSONResponse(w, testResult); err != nil { log.Error().Err(err).Msg("failed to write provider test response") @@ -2956,8 +3002,7 @@ func (h *AISettingsHandler) HandleTestProvider(w http.ResponseWriter, r *http.Re testProvider, err := providers.NewForProvider(cfg, provider, model) if err != nil { - testResult.Success = false - testResult.Message = "Failed to create provider" + testResult = newAIProviderTestFailureResponse(provider, model, err) log.Error().Err(err).Str("provider", provider).Msg("AI provider creation failed") if err := utils.WriteJSONResponse(w, testResult); err != nil { log.Error().Err(err).Msg("failed to write JSON response") @@ -2967,12 +3012,12 @@ func (h *AISettingsHandler) HandleTestProvider(w http.ResponseWriter, r *http.Re err = testProvider.TestConnection(ctx) if err != nil { - testResult.Success = false - testResult.Message = "Connection test failed" + testResult = newAIProviderTestFailureResponse(provider, model, err) log.Error().Err(err).Str("provider", provider).Msg("AI provider connection test failed") } else { testResult.Success = true testResult.Message = "Connection successful" + testResult.Model = config.NormalizeQuickstartModelString(model) } if err := utils.WriteJSONResponse(w, testResult); err != nil { diff --git a/internal/api/ai_handlers_test.go b/internal/api/ai_handlers_test.go index ed9ab5480..711a219fa 100644 --- a/internal/api/ai_handlers_test.go +++ b/internal/api/ai_handlers_test.go @@ -1215,12 +1215,23 @@ func TestAISettingsHandler_TestConnection_Failure(t *testing.T) { require.Equal(t, http.StatusOK, rec.Code) var resp struct { - Success bool `json:"success"` - Message string `json:"message"` + Success bool `json:"success"` + Message string `json:"message"` + Model string `json:"model"` + Cause string `json:"cause"` + Summary string `json:"summary"` + Recommendation string `json:"recommendation"` + Action string `json:"action"` } require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp)) assert.False(t, resp.Success) - assert.Equal(t, "Connection test failed", resp.Message) + assert.Equal(t, "Provider connection issue", resp.Message) + assert.Equal(t, "ollama:llama3", resp.Model) + assert.Equal(t, string(ai.PatrolFailureCauseProviderConnection), resp.Cause) + assert.Contains(t, resp.Summary, "healthy connection") + assert.Contains(t, resp.Recommendation, "Check provider reachability") + assert.Equal(t, "open_provider_settings", resp.Action) + assert.NotContains(t, rec.Body.String(), "Ollama returned status 500") } func TestAISettingsHandler_TestConnection_NoConfig(t *testing.T) { @@ -1241,12 +1252,16 @@ func TestAISettingsHandler_TestConnection_NoConfig(t *testing.T) { require.Equal(t, http.StatusOK, rec.Code) var resp struct { - Success bool `json:"success"` - Message string `json:"message"` + Success bool `json:"success"` + Message string `json:"message"` + Cause string `json:"cause"` + Recommendation string `json:"recommendation"` } require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp)) assert.False(t, resp.Success) - assert.Equal(t, "Connection test failed", resp.Message) + assert.Equal(t, "Provider not ready", resp.Message) + assert.Equal(t, string(ai.PatrolFailureCauseProviderNotConfigured), resp.Cause) + assert.Contains(t, resp.Recommendation, "provider settings") } // ======================================== @@ -1293,14 +1308,20 @@ func TestAISettingsHandler_TestProvider_NotConfigured(t *testing.T) { require.Equal(t, http.StatusOK, rec.Code) var resp struct { - Success bool `json:"success"` - Message string `json:"message"` - Provider string `json:"provider"` + Success bool `json:"success"` + Message string `json:"message"` + Provider string `json:"provider"` + Cause string `json:"cause"` + Recommendation string `json:"recommendation"` + Action string `json:"action"` } require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp)) assert.False(t, resp.Success) assert.Equal(t, "Provider not configured", resp.Message) assert.Equal(t, "openai", resp.Provider) + assert.Equal(t, string(ai.PatrolFailureCauseProviderNotConfigured), resp.Cause) + assert.Contains(t, resp.Recommendation, "configure the provider") + assert.Equal(t, "open_provider_settings", resp.Action) } func TestAISettingsHandler_TestProvider_NoAIConfig(t *testing.T) { @@ -1322,14 +1343,20 @@ func TestAISettingsHandler_TestProvider_NoAIConfig(t *testing.T) { require.Equal(t, http.StatusOK, rec.Code) var resp struct { - Success bool `json:"success"` - Message string `json:"message"` - Provider string `json:"provider"` + Success bool `json:"success"` + Message string `json:"message"` + Provider string `json:"provider"` + Cause string `json:"cause"` + Recommendation string `json:"recommendation"` + Action string `json:"action"` } require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp)) assert.False(t, resp.Success) assert.Equal(t, "Provider not configured", resp.Message) assert.Equal(t, "ollama", resp.Provider) + assert.Equal(t, string(ai.PatrolFailureCauseProviderNotConfigured), resp.Cause) + assert.Contains(t, resp.Recommendation, "provider settings") + assert.Equal(t, "open_provider_settings", resp.Action) } func TestAISettingsHandler_TestProvider_ConnectionFailure(t *testing.T) { @@ -1362,14 +1389,132 @@ func TestAISettingsHandler_TestProvider_ConnectionFailure(t *testing.T) { require.Equal(t, http.StatusOK, rec.Code) var resp struct { - Success bool `json:"success"` - Message string `json:"message"` - Provider string `json:"provider"` + Success bool `json:"success"` + Message string `json:"message"` + Provider string `json:"provider"` + Model string `json:"model"` + Cause string `json:"cause"` + Summary string `json:"summary"` + Recommendation string `json:"recommendation"` + Action string `json:"action"` } require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp)) assert.False(t, resp.Success) - assert.Equal(t, "Connection test failed", resp.Message) + assert.Equal(t, "Provider connection issue", resp.Message) assert.Equal(t, "ollama", resp.Provider) + assert.Equal(t, "ollama:llama3", resp.Model) + assert.Equal(t, string(ai.PatrolFailureCauseProviderConnection), resp.Cause) + assert.Contains(t, resp.Summary, "healthy connection") + assert.Contains(t, resp.Recommendation, "Check provider reachability") + assert.Equal(t, "open_provider_settings", resp.Action) + assert.NotContains(t, rec.Body.String(), "Ollama returned status 500") +} + +func TestAISettingsHandler_TestProvider_AuthFailureUsesSafeDiagnostic(t *testing.T) { + t.Parallel() + + ollama := newIPv4HTTPServer(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + })) + defer ollama.Close() + + tmp := t.TempDir() + cfg := &config.Config{DataPath: tmp} + persistence := config.NewConfigPersistence(tmp) + + aiCfg := config.NewDefaultAIConfig() + aiCfg.Enabled = true + aiCfg.Model = "ollama:llama3" + aiCfg.OllamaBaseURL = ollama.URL + if err := persistence.SaveAIConfig(*aiCfg); err != nil { + t.Fatalf("SaveAIConfig: %v", err) + } + + handler := newTestAISettingsHandler(cfg, persistence, nil) + + req := newLoopbackRequest(http.MethodPost, "/api/ai/test/ollama", nil) + rec := httptest.NewRecorder() + handler.HandleTestProvider(rec, req) + + require.Equal(t, http.StatusOK, rec.Code) + + var resp struct { + Success bool `json:"success"` + Message string `json:"message"` + Provider string `json:"provider"` + Model string `json:"model"` + Cause string `json:"cause"` + Summary string `json:"summary"` + Recommendation string `json:"recommendation"` + Action string `json:"action"` + } + require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp)) + assert.False(t, resp.Success) + assert.Equal(t, "Provider authentication issue", resp.Message) + assert.Equal(t, "ollama", resp.Provider) + assert.Equal(t, "ollama:llama3", resp.Model) + assert.Equal(t, string(ai.PatrolFailureCauseProviderAuth), resp.Cause) + assert.Contains(t, resp.Summary, "credentials") + assert.Contains(t, resp.Recommendation, "API key") + assert.Equal(t, "open_provider_settings", resp.Action) + assert.NotContains(t, rec.Body.String(), "Ollama returned status 401") +} + +func TestAISettingsHandler_TestProvider_ModelUnavailableUsesSafeDiagnostic(t *testing.T) { + t.Parallel() + + ollama := newIPv4HTTPServer(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/version": + _ = json.NewEncoder(w).Encode(map[string]any{"version": "0.1.0"}) + case "/api/tags": + _ = json.NewEncoder(w).Encode(map[string]any{"models": []map[string]any{{"name": "llama3:latest"}}}) + default: + http.NotFound(w, r) + } + })) + defer ollama.Close() + + tmp := t.TempDir() + cfg := &config.Config{DataPath: tmp} + persistence := config.NewConfigPersistence(tmp) + + aiCfg := config.NewDefaultAIConfig() + aiCfg.Enabled = true + aiCfg.Model = "ollama:missing:latest" + aiCfg.OllamaBaseURL = ollama.URL + if err := persistence.SaveAIConfig(*aiCfg); err != nil { + t.Fatalf("SaveAIConfig: %v", err) + } + + handler := newTestAISettingsHandler(cfg, persistence, nil) + + req := newLoopbackRequest(http.MethodPost, "/api/ai/test/ollama", nil) + rec := httptest.NewRecorder() + handler.HandleTestProvider(rec, req) + + require.Equal(t, http.StatusOK, rec.Code) + + var resp struct { + Success bool `json:"success"` + Message string `json:"message"` + Provider string `json:"provider"` + Model string `json:"model"` + Cause string `json:"cause"` + Summary string `json:"summary"` + Recommendation string `json:"recommendation"` + Action string `json:"action"` + } + require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp)) + assert.False(t, resp.Success) + assert.Equal(t, "Selected model unavailable", resp.Message) + assert.Equal(t, "ollama", resp.Provider) + assert.Equal(t, "ollama:missing:latest", resp.Model) + assert.Equal(t, string(ai.PatrolFailureCauseModelUnavailable), resp.Cause) + assert.Contains(t, resp.Summary, "configured Patrol model") + assert.Contains(t, resp.Recommendation, "choose one of the models") + assert.Equal(t, "open_provider_settings", resp.Action) + assert.NotContains(t, rec.Body.String(), "found: llama3:latest") } // ======================================== diff --git a/tests/integration/tests/52-ai-settings-provider-setup.spec.ts b/tests/integration/tests/52-ai-settings-provider-setup.spec.ts index 4f5c94f19..1f37d120a 100644 --- a/tests/integration/tests/52-ai-settings-provider-setup.spec.ts +++ b/tests/integration/tests/52-ai-settings-provider-setup.spec.ts @@ -295,4 +295,114 @@ test.describe("Assistant & Patrol settings provider setup", () => { page.getByText("Model: openrouter:deepseek/deepseek-r1"), ).toBeVisible(); }); + + test("settings save failure keeps provider preflight recommendation context", async ({ + page, + }, testInfo) => { + test.skip( + testInfo.project.name.startsWith("mobile-"), + "Desktop-only settings coverage", + ); + + await ensureAuthenticated(page); + + const settings: MockAISettings = { + ...baseSettings(), + enabled: true, + configured: true, + model: "openrouter:deepseek/deepseek-r1", + patrol_model: "openrouter:deepseek/deepseek-r1", + openrouter_configured: true, + configured_providers: ["openrouter"], + }; + let updateHits = 0; + + await page.route("**/api/settings/ai", async (route) => { + await route.fulfill({ + status: 200, + contentType: "application/json", + body: JSON.stringify(settings), + }); + }); + + await page.route("**/api/ai/models", async (route) => { + await route.fulfill({ + status: 200, + contentType: "application/json", + body: JSON.stringify({ + models: [ + { + id: "openrouter:deepseek/deepseek-r1", + name: "DeepSeek R1", + notable: true, + }, + ], + }), + }); + }); + + await page.route("**/api/ai/test/openrouter", async (route) => { + await route.fulfill({ + status: 200, + contentType: "application/json", + body: JSON.stringify({ + success: false, + message: "Provider authentication issue", + provider: "openrouter", + model: "openrouter:deepseek/deepseek-r1", + cause: "provider_auth", + summary: + "Pulse Patrol cannot analyze your infrastructure because the provider rejected the configured credentials or account access.", + recommendation: + "Check the API key or provider authentication in Patrol provider settings, then rerun Patrol.", + action: "open_provider_settings", + }), + }); + }); + + await page.route("**/api/ai/chat/sessions", async (route) => { + await route.fulfill({ + status: 200, + contentType: "application/json", + body: JSON.stringify([]), + }); + }); + + await page.route("**/api/settings/ai/update", async (route) => { + updateHits += 1; + await route.fulfill({ + status: 500, + contentType: "application/json", + body: JSON.stringify({ + error: "Unable to save Assistant & Patrol settings.", + }), + }); + }); + + await page.goto("/settings/system-ai", { waitUntil: "domcontentloaded" }); + await expect( + page.getByRole("heading", { name: "Assistant & Patrol", level: 1 }), + ).toBeVisible(); + await expect( + page.getByText("Provider authentication issue").first(), + ).toBeVisible(); + await expect( + page + .getByText( + "Check the API key or provider authentication in Patrol provider settings, then rerun Patrol.", + ) + .first(), + ).toBeVisible(); + + await page.getByRole("button", { name: /save changes/i }).click(); + + await expect.poll(() => updateHits).toBe(1); + const failureMessage = page.getByText( + /OpenRouter provider.*Provider authentication issue.*Unable to save Assistant & Patrol settings/i, + ); + await expect(failureMessage).toBeVisible(); + await expect(failureMessage).toContainText( + "Check the API key or provider authentication in Patrol provider settings", + ); + }); });