From 0fbcc901843117fa3da68ae3d920e950197393bb Mon Sep 17 00:00:00 2001 From: rcourtman Date: Wed, 13 May 2026 13:02:29 +0100 Subject: [PATCH] Harden external model prompt-secret sanitation --- .../prompt_secret_sanitizer_test.go | 119 +++++++++++++++++ .../resource_policy_sanitizer.go | 120 ++++++++++++++++- internal/ai/safety/redaction.go | 126 +++++++++++++++--- internal/ai/safety/redaction_test.go | 63 +++++++++ 4 files changed, 405 insertions(+), 23 deletions(-) create mode 100644 internal/ai/modelboundary/prompt_secret_sanitizer_test.go create mode 100644 internal/ai/safety/redaction_test.go diff --git a/internal/ai/modelboundary/prompt_secret_sanitizer_test.go b/internal/ai/modelboundary/prompt_secret_sanitizer_test.go new file mode 100644 index 000000000..694ed049e --- /dev/null +++ b/internal/ai/modelboundary/prompt_secret_sanitizer_test.go @@ -0,0 +1,119 @@ +package modelboundary + +import ( + "strings" + "testing" + + "github.com/rcourtman/pulse-go-rewrite/internal/ai/providers" +) + +func TestRequestSanitizerForModelRedactsPromptSecretsWithoutResourcePolicy(t *testing.T) { + sanitizer := RequestSanitizerForModel("anthropic:claude-3-5-sonnet", nil) + if sanitizer == nil { + t.Fatal("expected external model sanitizer without resource provider") + } + + req := providers.ChatRequest{ + System: "Use this only if needed: password: system-password", + Messages: []providers.Message{ + { + Role: "user", + Content: `Operator prompt includes {"api_key":"json-secret-value"}`, + ReasoningContent: "Authorization: Bearer sk-reasoning-secret", + ToolCalls: []providers.ToolCall{{ + ID: "tool-1", + Name: "pulse_report", + Input: map[string]interface{}{ + "api_key": "plain-tool-key", + "credentials": map[string]interface{}{ + "value": "nested-credential-value", + "metadata": "safe metadata", + }, + "safe": []interface{}{"keep-me", "sk-provider-token"}, + }, + }}, + }, + { + Role: "tool", + ToolResult: &providers.ToolResult{ToolUseID: "tool-1", Content: "x-api-key: sk-tool-result-secret"}, + }, + }, + Tools: []providers.Tool{{ + Name: "pulse_report", + Description: "Call report narrator with access_token=tool-description-token", + InputSchema: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "api_key": map[string]interface{}{ + "type": "string", + "description": "Provider API key", + "default": "schema-default-secret", + "examples": []interface{}{"schema-example-secret"}, + }, + }, + }, + }}, + } + + got := sanitizer(req) + combined := strings.Join([]string{ + got.System, + got.Messages[0].Content, + got.Messages[0].ReasoningContent, + got.Messages[1].ToolResult.Content, + got.Tools[0].Description, + }, "\n") + for _, forbidden := range []string{ + "system-password", + "json-secret-value", + "sk-reasoning-secret", + "sk-tool-result-secret", + "tool-description-token", + } { + if strings.Contains(combined, forbidden) { + t.Fatalf("sanitized request leaked %q:\n%s", forbidden, combined) + } + } + + input := got.Messages[0].ToolCalls[0].Input + if input["api_key"] != "[REDACTED]" { + t.Fatalf("tool call api_key = %#v, want redacted marker", input["api_key"]) + } + credentials := input["credentials"].(map[string]interface{}) + if credentials["value"] != "[REDACTED]" { + t.Fatalf("nested credential value = %#v, want redacted marker", credentials["value"]) + } + if credentials["metadata"] != "safe metadata" { + t.Fatalf("non-value credential metadata was changed: %#v", credentials["metadata"]) + } + safeValues := input["safe"].([]interface{}) + if safeValues[0] != "keep-me" { + t.Fatalf("safe non-secret value was changed: %#v", safeValues[0]) + } + if strings.Contains(safeValues[1].(string), "sk-provider-token") { + t.Fatalf("provider-shaped token in safe field was not redacted: %#v", safeValues[1]) + } + + properties := got.Tools[0].InputSchema["properties"].(map[string]interface{}) + apiKeyProperty := properties["api_key"].(map[string]interface{}) + if apiKeyProperty["type"] != "string" { + t.Fatalf("schema type was changed: %#v", apiKeyProperty["type"]) + } + if apiKeyProperty["description"] != "Provider API key" { + t.Fatalf("schema description was changed: %#v", apiKeyProperty["description"]) + } + if apiKeyProperty["default"] != "[REDACTED]" { + t.Fatalf("schema default = %#v, want redacted marker", apiKeyProperty["default"]) + } + examples := apiKeyProperty["examples"].([]interface{}) + if examples[0] != "[REDACTED]" { + t.Fatalf("schema example = %#v, want redacted marker", examples[0]) + } + + if req.Messages[0].ToolCalls[0].Input["api_key"] != "plain-tool-key" { + t.Fatalf("sanitizer mutated original tool call input: %#v", req.Messages[0].ToolCalls[0].Input["api_key"]) + } + if req.Tools[0].InputSchema["type"] != "object" { + t.Fatalf("sanitizer mutated original tool schema: %#v", req.Tools[0].InputSchema["type"]) + } +} diff --git a/internal/ai/modelboundary/resource_policy_sanitizer.go b/internal/ai/modelboundary/resource_policy_sanitizer.go index 9e7be33b6..f039d8ef4 100644 --- a/internal/ai/modelboundary/resource_policy_sanitizer.go +++ b/internal/ai/modelboundary/resource_policy_sanitizer.go @@ -4,6 +4,7 @@ import ( "strings" "github.com/rcourtman/pulse-go-rewrite/internal/ai/providers" + "github.com/rcourtman/pulse-go-rewrite/internal/ai/safety" "github.com/rcourtman/pulse-go-rewrite/internal/config" "github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources" ) @@ -20,17 +21,18 @@ type allUnifiedResourceProvider interface { // RequestSanitizerForModel returns a sanitizer for non-local model traffic. // It is intentionally applied at the final provider transport boundary so -// later tool-result turns cannot bypass the resource-policy posture exported -// to the operator-facing Data Handling surface. +// operator-entered prompts, handoff text, tool-result turns, and provider-bound +// tool schemas cannot bypass prompt-secret or resource-policy sanitation. func RequestSanitizerForModel(model string, provider UnifiedResourceProvider) func(providers.ChatRequest) providers.ChatRequest { - if !ModelUsesExternalProvider(model) || provider == nil { + if !ModelUsesExternalProvider(model) { return nil } resources := resourcePolicySanitizerResources(provider) - if len(resources) == 0 { - return nil - } return func(req providers.ChatRequest) providers.ChatRequest { + req = sanitizeProviderRequestForPromptSecrets(req) + if len(resources) == 0 { + return req + } return sanitizeProviderRequestForResources(req, resources) } } @@ -191,3 +193,109 @@ func sanitizeResourcePolicyValue(value interface{}, resources []unifiedresources return value } } + +func sanitizeProviderRequestForPromptSecrets(req providers.ChatRequest) providers.ChatRequest { + req.System = sanitizePromptSecretText(req.System) + + if len(req.Messages) > 0 { + req.Messages = append([]providers.Message(nil), req.Messages...) + for i := range req.Messages { + req.Messages[i] = sanitizeProviderMessageForPromptSecrets(req.Messages[i]) + } + } + if len(req.Tools) > 0 { + req.Tools = append([]providers.Tool(nil), req.Tools...) + for i := range req.Tools { + req.Tools[i] = sanitizeProviderToolForPromptSecrets(req.Tools[i]) + } + } + return req +} + +func sanitizeProviderMessageForPromptSecrets(msg providers.Message) providers.Message { + msg.Content = sanitizePromptSecretText(msg.Content) + msg.ReasoningContent = sanitizePromptSecretText(msg.ReasoningContent) + if msg.ToolResult != nil { + toolResult := *msg.ToolResult + toolResult.Content = sanitizePromptSecretText(toolResult.Content) + msg.ToolResult = &toolResult + } + if len(msg.ToolCalls) > 0 { + msg.ToolCalls = append([]providers.ToolCall(nil), msg.ToolCalls...) + for i := range msg.ToolCalls { + msg.ToolCalls[i].Input = sanitizePromptSecretMap(msg.ToolCalls[i].Input, false) + } + } + return msg +} + +func sanitizeProviderToolForPromptSecrets(tool providers.Tool) providers.Tool { + tool.Description = sanitizePromptSecretText(tool.Description) + tool.InputSchema = sanitizePromptSecretMap(tool.InputSchema, false) + return tool +} + +func sanitizePromptSecretText(value string) string { + redacted, _ := safety.RedactSensitiveText(value) + return redacted +} + +func sanitizePromptSecretSensitiveValue(value string) string { + redacted, _ := safety.RedactSensitiveValue(value) + return redacted +} + +func sanitizePromptSecretMap(values map[string]interface{}, sensitiveParent bool) map[string]interface{} { + if len(values) == 0 { + return values + } + sanitized := make(map[string]interface{}, len(values)) + for key, value := range values { + keyIsSensitive := safety.IsSensitiveFieldName(key) + valueIsSensitive := keyIsSensitive || sensitiveParent && safety.IsSensitiveValueCarrierFieldName(key) + sanitized[key] = sanitizePromptSecretValue(key, value, valueIsSensitive) + } + return sanitized +} + +func sanitizePromptSecretValue(fieldName string, value interface{}, sensitiveValue bool) interface{} { + switch typed := value.(type) { + case string: + if sensitiveValue { + return sanitizePromptSecretSensitiveValue(typed) + } + return sanitizePromptSecretText(typed) + case []string: + out := make([]string, len(typed)) + for i := range typed { + if sensitiveValue { + out[i] = sanitizePromptSecretSensitiveValue(typed[i]) + continue + } + out[i] = sanitizePromptSecretText(typed[i]) + } + return out + case []interface{}: + out := make([]interface{}, len(typed)) + for i := range typed { + out[i] = sanitizePromptSecretValue(fieldName, typed[i], sensitiveValue) + } + return out + case map[string]interface{}: + return sanitizePromptSecretMap(typed, sensitiveValue) + case map[string]string: + out := make(map[string]string, len(typed)) + for key, nested := range typed { + keyIsSensitive := safety.IsSensitiveFieldName(key) + valueIsSensitive := keyIsSensitive || sensitiveValue && safety.IsSensitiveValueCarrierFieldName(key) + if valueIsSensitive { + out[key] = sanitizePromptSecretSensitiveValue(nested) + continue + } + out[key] = sanitizePromptSecretText(nested) + } + return out + default: + return value + } +} diff --git a/internal/ai/safety/redaction.go b/internal/ai/safety/redaction.go index 79b28d155..26bd551dc 100644 --- a/internal/ai/safety/redaction.go +++ b/internal/ai/safety/redaction.go @@ -11,16 +11,26 @@ var ( pemEndRE = regexp.MustCompile(`(?m)^-----END [A-Z0-9 ][A-Z0-9 ]+-----\s*$`) // Common secret-bearing key/value patterns. - kvSecretRE = regexp.MustCompile(`(?i)\b(password|passwd|passphrase|secret|token|api[_-]?key|client[_-]?secret|private[_-]?key)\b\s*[:=]\s*(.+)$`) + kvSecretRE = regexp.MustCompile(`(?i)\b(password|passwd|passphrase|secret|token|api[_-]?key|client[_-]?secret|private[_-]?key|access[_-]?token|refresh[_-]?token)\b\s*[:=]\s*(.+)$`) - // Authorization bearer header. - bearerRE = regexp.MustCompile(`(?i)\bauthorization\s*:\s*bearer\s+([A-Za-z0-9\-._~+/]+=*)`) + // Structured, URL, and header forms that commonly appear in prompts, + // provider errors, tool schemas, and operator-entered handoff context. + quotedKVSecretRE = regexp.MustCompile(`(?i)("(?:password|passwd|passphrase|secret|token|api[_-]?key|apikey|client[_-]?secret|private[_-]?key|access[_-]?token|refresh[_-]?token|authorization|x-api-key|credential)"\s*:\s*")[^"]+`) + querySecretRE = regexp.MustCompile(`(?i)([?&](?:key|api[_-]?key|apikey|access[_-]?token|refresh[_-]?token|token|client[_-]?secret|secret)=)[^\s&"']+`) + bearerRE = regexp.MustCompile(`(?i)(\bauthorization\s*:\s*bearer\s+)([A-Za-z0-9\-._~+/]+=*)`) + xAPIKeyHeaderRE = regexp.MustCompile(`(?i)(\bx-api-key\s*:\s*)[^\s,;]+`) + urlUserInfoRE = regexp.MustCompile(`(?i)(https?://)[^\s/@:]+:[^\s/@]+@`) // Common token formats to reduce accidental leakage even when not in k=v form. awsAccessKeyRE = regexp.MustCompile(`\b(AKIA|ASIA)[0-9A-Z]{16}\b`) jwtRE = regexp.MustCompile(`\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b`) + openAIKeyRE = regexp.MustCompile(`\bsk-[A-Za-z0-9_-]{8,}\b`) + googleAPIKeyRE = regexp.MustCompile(`\bAIza[0-9A-Za-z_-]{10,}\b`) + githubTokenRE = regexp.MustCompile(`\bgh[opsur]_[A-Za-z0-9_]{10,}\b`) ) +const redactedSecretValue = "[REDACTED]" + // RedactSensitiveText removes likely-secret material from text outputs to reduce accidental // key/token leakage through AI tool results. It is intentionally conservative: if a value // looks sensitive, it will be replaced. @@ -62,20 +72,7 @@ func RedactSensitiveText(input string) (string, int) { } } - if bearerRE.MatchString(line) { - lines[i] = bearerRE.ReplaceAllString(line, "Authorization: Bearer [REDACTED]") - redactions++ - continue - } - - if awsAccessKeyRE.MatchString(line) { - lines[i] = awsAccessKeyRE.ReplaceAllString(line, "[REDACTED_AWS_ACCESS_KEY]") - redactions++ - } - if jwtRE.MatchString(lines[i]) { - lines[i] = jwtRE.ReplaceAllString(lines[i], "[REDACTED_JWT]") - redactions++ - } + lines[i], redactions = redactLineSecretPatterns(lines[i], redactions) } // Drop empty lines introduced by PEM redaction. @@ -88,3 +85,98 @@ func RedactSensitiveText(input string) (string, int) { } return strings.Join(outLines, "\n"), redactions } + +func redactLineSecretPatterns(line string, redactions int) (string, int) { + var count int + line, count = replaceAllCounting(quotedKVSecretRE, line, `${1}`+redactedSecretValue) + redactions += count + line, count = replaceAllCounting(querySecretRE, line, `${1}`+redactedSecretValue) + redactions += count + line, count = replaceAllCounting(bearerRE, line, `${1}`+redactedSecretValue) + redactions += count + line, count = replaceAllCounting(xAPIKeyHeaderRE, line, `${1}`+redactedSecretValue) + redactions += count + line, count = replaceAllCounting(urlUserInfoRE, line, `${1}`+redactedSecretValue+"@") + redactions += count + line, count = replaceAllCounting(awsAccessKeyRE, line, "[REDACTED_AWS_ACCESS_KEY]") + redactions += count + line, count = replaceAllCounting(jwtRE, line, "[REDACTED_JWT]") + redactions += count + line, count = replaceAllCounting(openAIKeyRE, line, "[REDACTED_PROVIDER_KEY]") + redactions += count + line, count = replaceAllCounting(googleAPIKeyRE, line, "[REDACTED_PROVIDER_KEY]") + redactions += count + line, count = replaceAllCounting(githubTokenRE, line, "[REDACTED_PROVIDER_TOKEN]") + redactions += count + return line, redactions +} + +func replaceAllCounting(re *regexp.Regexp, input string, replacement string) (string, int) { + matches := re.FindAllStringIndex(input, -1) + if len(matches) == 0 { + return input, 0 + } + return re.ReplaceAllString(input, replacement), len(matches) +} + +// RedactSensitiveFieldValue redacts text with key context. It catches bland +// values that are only secret-shaped because the surrounding field says so, +// such as a tool-call input named api_key. +func RedactSensitiveFieldValue(fieldName string, input string) (string, int) { + if !IsSensitiveFieldName(fieldName) { + return RedactSensitiveText(input) + } + return RedactSensitiveValue(input) +} + +// RedactSensitiveValue redacts a structured value that the caller already +// knows belongs to a sensitive field or schema value slot. +func RedactSensitiveValue(input string) (string, int) { + redacted, count := RedactSensitiveText(input) + if strings.TrimSpace(redacted) == "" { + return redacted, count + } + if redacted == redactedSecretValue { + return redacted, count + } + return redactedSecretValue, count + 1 +} + +// IsSensitiveFieldName reports whether a structured key usually carries a +// credential or secret-bearing value. +func IsSensitiveFieldName(name string) bool { + normalized := normalizedFieldName(name) + if normalized == "" { + return false + } + switch normalized { + case "password", "passwd", "passphrase", "secret", "token", "apikey", "clientsecret", + "privatekey", "accesstoken", "refreshtoken", "authorization", "xapikey", "credential", + "credentials": + return true + default: + return false + } +} + +// IsSensitiveValueCarrierFieldName reports whether a nested field commonly +// carries an example, default, or literal value for a sensitive schema property. +func IsSensitiveValueCarrierFieldName(name string) bool { + normalized := normalizedFieldName(name) + switch normalized { + case "value", "values", "default", "defaults", "example", "examples", "enum", "const": + return true + default: + return false + } +} + +func normalizedFieldName(name string) string { + var b strings.Builder + for _, r := range strings.ToLower(strings.TrimSpace(name)) { + if r >= 'a' && r <= 'z' || r >= '0' && r <= '9' { + b.WriteRune(r) + } + } + return b.String() +} diff --git a/internal/ai/safety/redaction_test.go b/internal/ai/safety/redaction_test.go new file mode 100644 index 000000000..1b2c2a62c --- /dev/null +++ b/internal/ai/safety/redaction_test.go @@ -0,0 +1,63 @@ +package safety + +import ( + "strings" + "testing" +) + +func TestRedactSensitiveTextCoversStructuredPromptSecrets(t *testing.T) { + input := strings.Join([]string{ + `password: hunter2`, + `{"api_key":"plain-json-secret","access_token":"access-token-value"}`, + `GET https://example.test/v1?key=AIzaSySecretTokenValue®ion=us`, + `Authorization: Bearer sk-live-secret-token`, + `x-api-key: sk-provider-secret`, + `https://operator:password@example.test/v1`, + `github=ghp_abcdefghijklmnopqrstuvwxyz`, + }, "\n") + + redacted, count := RedactSensitiveText(input) + if count == 0 { + t.Fatal("expected redactions") + } + for _, forbidden := range []string{ + "hunter2", + "plain-json-secret", + "access-token-value", + "AIzaSySecretTokenValue", + "sk-live-secret-token", + "sk-provider-secret", + "operator:password@", + "ghp_abcdefghijklmnopqrstuvwxyz", + } { + if strings.Contains(redacted, forbidden) { + t.Fatalf("redacted text leaked %q:\n%s", forbidden, redacted) + } + } + for _, retained := range []string{ + "password:", + `"api_key":"[REDACTED]"`, + "Authorization: Bearer [REDACTED]", + "x-api-key: [REDACTED]", + "https://[REDACTED]@example.test/v1", + } { + if !strings.Contains(redacted, retained) { + t.Fatalf("redacted text missing retained context %q:\n%s", retained, redacted) + } + } +} + +func TestRedactSensitiveFieldValueUsesKeyContext(t *testing.T) { + redacted, count := RedactSensitiveFieldValue("client_secret", "plain-value-without-token-shape") + if redacted != "[REDACTED]" { + t.Fatalf("redacted value = %q, want marker", redacted) + } + if count == 0 { + t.Fatal("expected key-context redaction count") + } + + unchanged, count := RedactSensitiveFieldValue("display_name", "plain-value-without-token-shape") + if unchanged != "plain-value-without-token-shape" || count != 0 { + t.Fatalf("non-sensitive field redaction = %q count=%d", unchanged, count) + } +}