Harden external model prompt-secret sanitation

2026-05-19 16:27:37 +00:00 · 2026-05-13 13:02:29 +01:00 · 2026-05-13 13:02:29 +01:00 · 0fbcc90184
commit 0fbcc90184
parent 31331b5451
4 changed files with 405 additions and 23 deletions
--- a/internal/ai/modelboundary/prompt_secret_sanitizer_test.go
+++ b/internal/ai/modelboundary/prompt_secret_sanitizer_test.go
@ -0,0 +1,119 @@
+package modelboundary
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/rcourtman/pulse-go-rewrite/internal/ai/providers"
+)
+
+func TestRequestSanitizerForModelRedactsPromptSecretsWithoutResourcePolicy(t *testing.T) {
+	sanitizer := RequestSanitizerForModel("anthropic:claude-3-5-sonnet", nil)
+	if sanitizer == nil {
+		t.Fatal("expected external model sanitizer without resource provider")
+	}
+
+	req := providers.ChatRequest{
+		System: "Use this only if needed: password: system-password",
+		Messages: []providers.Message{
+			{
+				Role:             "user",
+				Content:          `Operator prompt includes {"api_key":"json-secret-value"}`,
+				ReasoningContent: "Authorization: Bearer sk-reasoning-secret",
+				ToolCalls: []providers.ToolCall{{
+					ID:   "tool-1",
+					Name: "pulse_report",
+					Input: map[string]interface{}{
+						"api_key": "plain-tool-key",
+						"credentials": map[string]interface{}{
+							"value":    "nested-credential-value",
+							"metadata": "safe metadata",
+						},
+						"safe": []interface{}{"keep-me", "sk-provider-token"},
+					},
+				}},
+			},
+			{
+				Role:       "tool",
+				ToolResult: &providers.ToolResult{ToolUseID: "tool-1", Content: "x-api-key: sk-tool-result-secret"},
+			},
+		},
+		Tools: []providers.Tool{{
+			Name:        "pulse_report",
+			Description: "Call report narrator with access_token=tool-description-token",
+			InputSchema: map[string]interface{}{
+				"type": "object",
+				"properties": map[string]interface{}{
+					"api_key": map[string]interface{}{
+						"type":        "string",
+						"description": "Provider API key",
+						"default":     "schema-default-secret",
+						"examples":    []interface{}{"schema-example-secret"},
+					},
+				},
+			},
+		}},
+	}
+
+	got := sanitizer(req)
+	combined := strings.Join([]string{
+		got.System,
+		got.Messages[0].Content,
+		got.Messages[0].ReasoningContent,
+		got.Messages[1].ToolResult.Content,
+		got.Tools[0].Description,
+	}, "\n")
+	for _, forbidden := range []string{
+		"system-password",
+		"json-secret-value",
+		"sk-reasoning-secret",
+		"sk-tool-result-secret",
+		"tool-description-token",
+	} {
+		if strings.Contains(combined, forbidden) {
+			t.Fatalf("sanitized request leaked %q:\n%s", forbidden, combined)
+		}
+	}
+
+	input := got.Messages[0].ToolCalls[0].Input
+	if input["api_key"] != "[REDACTED]" {
+		t.Fatalf("tool call api_key = %#v, want redacted marker", input["api_key"])
+	}
+	credentials := input["credentials"].(map[string]interface{})
+	if credentials["value"] != "[REDACTED]" {
+		t.Fatalf("nested credential value = %#v, want redacted marker", credentials["value"])
+	}
+	if credentials["metadata"] != "safe metadata" {
+		t.Fatalf("non-value credential metadata was changed: %#v", credentials["metadata"])
+	}
+	safeValues := input["safe"].([]interface{})
+	if safeValues[0] != "keep-me" {
+		t.Fatalf("safe non-secret value was changed: %#v", safeValues[0])
+	}
+	if strings.Contains(safeValues[1].(string), "sk-provider-token") {
+		t.Fatalf("provider-shaped token in safe field was not redacted: %#v", safeValues[1])
+	}
+
+	properties := got.Tools[0].InputSchema["properties"].(map[string]interface{})
+	apiKeyProperty := properties["api_key"].(map[string]interface{})
+	if apiKeyProperty["type"] != "string" {
+		t.Fatalf("schema type was changed: %#v", apiKeyProperty["type"])
+	}
+	if apiKeyProperty["description"] != "Provider API key" {
+		t.Fatalf("schema description was changed: %#v", apiKeyProperty["description"])
+	}
+	if apiKeyProperty["default"] != "[REDACTED]" {
+		t.Fatalf("schema default = %#v, want redacted marker", apiKeyProperty["default"])
+	}
+	examples := apiKeyProperty["examples"].([]interface{})
+	if examples[0] != "[REDACTED]" {
+		t.Fatalf("schema example = %#v, want redacted marker", examples[0])
+	}
+
+	if req.Messages[0].ToolCalls[0].Input["api_key"] != "plain-tool-key" {
+		t.Fatalf("sanitizer mutated original tool call input: %#v", req.Messages[0].ToolCalls[0].Input["api_key"])
+	}
+	if req.Tools[0].InputSchema["type"] != "object" {
+		t.Fatalf("sanitizer mutated original tool schema: %#v", req.Tools[0].InputSchema["type"])
+	}
+}
--- a/internal/ai/modelboundary/resource_policy_sanitizer.go
+++ b/internal/ai/modelboundary/resource_policy_sanitizer.go
@ -4,6 +4,7 @@ import (
 	"strings"

 	"github.com/rcourtman/pulse-go-rewrite/internal/ai/providers"
+	"github.com/rcourtman/pulse-go-rewrite/internal/ai/safety"
 	"github.com/rcourtman/pulse-go-rewrite/internal/config"
 	"github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
 )
@ -20,17 +21,18 @@ type allUnifiedResourceProvider interface {

 // RequestSanitizerForModel returns a sanitizer for non-local model traffic.
 // It is intentionally applied at the final provider transport boundary so
-// later tool-result turns cannot bypass the resource-policy posture exported
-// to the operator-facing Data Handling surface.
+// operator-entered prompts, handoff text, tool-result turns, and provider-bound
+// tool schemas cannot bypass prompt-secret or resource-policy sanitation.
 func RequestSanitizerForModel(model string, provider UnifiedResourceProvider) func(providers.ChatRequest) providers.ChatRequest {
-	if !ModelUsesExternalProvider(model) || provider == nil {
+	if !ModelUsesExternalProvider(model) {
 		return nil
 	}
 	resources := resourcePolicySanitizerResources(provider)
-	if len(resources) == 0 {
-		return nil
-	}
 	return func(req providers.ChatRequest) providers.ChatRequest {
+		req = sanitizeProviderRequestForPromptSecrets(req)
+		if len(resources) == 0 {
+			return req
+		}
 		return sanitizeProviderRequestForResources(req, resources)
 	}
 }
@ -191,3 +193,109 @@ func sanitizeResourcePolicyValue(value interface{}, resources []unifiedresources
 		return value
 	}
 }
+
+func sanitizeProviderRequestForPromptSecrets(req providers.ChatRequest) providers.ChatRequest {
+	req.System = sanitizePromptSecretText(req.System)
+
+	if len(req.Messages) > 0 {
+		req.Messages = append([]providers.Message(nil), req.Messages...)
+		for i := range req.Messages {
+			req.Messages[i] = sanitizeProviderMessageForPromptSecrets(req.Messages[i])
+		}
+	}
+	if len(req.Tools) > 0 {
+		req.Tools = append([]providers.Tool(nil), req.Tools...)
+		for i := range req.Tools {
+			req.Tools[i] = sanitizeProviderToolForPromptSecrets(req.Tools[i])
+		}
+	}
+	return req
+}
+
+func sanitizeProviderMessageForPromptSecrets(msg providers.Message) providers.Message {
+	msg.Content = sanitizePromptSecretText(msg.Content)
+	msg.ReasoningContent = sanitizePromptSecretText(msg.ReasoningContent)
+	if msg.ToolResult != nil {
+		toolResult := *msg.ToolResult
+		toolResult.Content = sanitizePromptSecretText(toolResult.Content)
+		msg.ToolResult = &toolResult
+	}
+	if len(msg.ToolCalls) > 0 {
+		msg.ToolCalls = append([]providers.ToolCall(nil), msg.ToolCalls...)
+		for i := range msg.ToolCalls {
+			msg.ToolCalls[i].Input = sanitizePromptSecretMap(msg.ToolCalls[i].Input, false)
+		}
+	}
+	return msg
+}
+
+func sanitizeProviderToolForPromptSecrets(tool providers.Tool) providers.Tool {
+	tool.Description = sanitizePromptSecretText(tool.Description)
+	tool.InputSchema = sanitizePromptSecretMap(tool.InputSchema, false)
+	return tool
+}
+
+func sanitizePromptSecretText(value string) string {
+	redacted, _ := safety.RedactSensitiveText(value)
+	return redacted
+}
+
+func sanitizePromptSecretSensitiveValue(value string) string {
+	redacted, _ := safety.RedactSensitiveValue(value)
+	return redacted
+}
+
+func sanitizePromptSecretMap(values map[string]interface{}, sensitiveParent bool) map[string]interface{} {
+	if len(values) == 0 {
+		return values
+	}
+	sanitized := make(map[string]interface{}, len(values))
+	for key, value := range values {
+		keyIsSensitive := safety.IsSensitiveFieldName(key)
+		valueIsSensitive := keyIsSensitive || sensitiveParent && safety.IsSensitiveValueCarrierFieldName(key)
+		sanitized[key] = sanitizePromptSecretValue(key, value, valueIsSensitive)
+	}
+	return sanitized
+}
+
+func sanitizePromptSecretValue(fieldName string, value interface{}, sensitiveValue bool) interface{} {
+	switch typed := value.(type) {
+	case string:
+		if sensitiveValue {
+			return sanitizePromptSecretSensitiveValue(typed)
+		}
+		return sanitizePromptSecretText(typed)
+	case []string:
+		out := make([]string, len(typed))
+		for i := range typed {
+			if sensitiveValue {
+				out[i] = sanitizePromptSecretSensitiveValue(typed[i])
+				continue
+			}
+			out[i] = sanitizePromptSecretText(typed[i])
+		}
+		return out
+	case []interface{}:
+		out := make([]interface{}, len(typed))
+		for i := range typed {
+			out[i] = sanitizePromptSecretValue(fieldName, typed[i], sensitiveValue)
+		}
+		return out
+	case map[string]interface{}:
+		return sanitizePromptSecretMap(typed, sensitiveValue)
+	case map[string]string:
+		out := make(map[string]string, len(typed))
+		for key, nested := range typed {
+			keyIsSensitive := safety.IsSensitiveFieldName(key)
+			valueIsSensitive := keyIsSensitive || sensitiveValue && safety.IsSensitiveValueCarrierFieldName(key)
+			if valueIsSensitive {
+				out[key] = sanitizePromptSecretSensitiveValue(nested)
+				continue
+			}
+			out[key] = sanitizePromptSecretText(nested)
+		}
+		return out
+	default:
+		return value
+	}
+}
--- a/internal/ai/safety/redaction.go
+++ b/internal/ai/safety/redaction.go
@ -11,16 +11,26 @@ var (
 	pemEndRE   = regexp.MustCompile(`(?m)^-----END [A-Z0-9 ][A-Z0-9 ]+-----\s*$`)

 	// Common secret-bearing key/value patterns.
-	kvSecretRE = regexp.MustCompile(`(?i)\b(password|passwd|passphrase|secret|token|api[_-]?key|client[_-]?secret|private[_-]?key)\b\s*[:=]\s*(.+)$`)
+	kvSecretRE = regexp.MustCompile(`(?i)\b(password|passwd|passphrase|secret|token|api[_-]?key|client[_-]?secret|private[_-]?key|access[_-]?token|refresh[_-]?token)\b\s*[:=]\s*(.+)$`)

-	// Authorization bearer header.
-	bearerRE = regexp.MustCompile(`(?i)\bauthorization\s*:\s*bearer\s+([A-Za-z0-9\-._~+/]+=*)`)
+	// Structured, URL, and header forms that commonly appear in prompts,
+	// provider errors, tool schemas, and operator-entered handoff context.
+	quotedKVSecretRE = regexp.MustCompile(`(?i)("(?:password|passwd|passphrase|secret|token|api[_-]?key|apikey|client[_-]?secret|private[_-]?key|access[_-]?token|refresh[_-]?token|authorization|x-api-key|credential)"\s*:\s*")[^"]+`)
+	querySecretRE    = regexp.MustCompile(`(?i)([?&](?:key|api[_-]?key|apikey|access[_-]?token|refresh[_-]?token|token|client[_-]?secret|secret)=)[^\s&"']+`)
+	bearerRE         = regexp.MustCompile(`(?i)(\bauthorization\s*:\s*bearer\s+)([A-Za-z0-9\-._~+/]+=*)`)
+	xAPIKeyHeaderRE  = regexp.MustCompile(`(?i)(\bx-api-key\s*:\s*)[^\s,;]+`)
+	urlUserInfoRE    = regexp.MustCompile(`(?i)(https?://)[^\s/@:]+:[^\s/@]+@`)

 	// Common token formats to reduce accidental leakage even when not in k=v form.
 	awsAccessKeyRE = regexp.MustCompile(`\b(AKIA|ASIA)[0-9A-Z]{16}\b`)
 	jwtRE          = regexp.MustCompile(`\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b`)
+	openAIKeyRE    = regexp.MustCompile(`\bsk-[A-Za-z0-9_-]{8,}\b`)
+	googleAPIKeyRE = regexp.MustCompile(`\bAIza[0-9A-Za-z_-]{10,}\b`)
+	githubTokenRE  = regexp.MustCompile(`\bgh[opsur]_[A-Za-z0-9_]{10,}\b`)
 )

+const redactedSecretValue = "[REDACTED]"
+
 // RedactSensitiveText removes likely-secret material from text outputs to reduce accidental
 // key/token leakage through AI tool results. It is intentionally conservative: if a value
 // looks sensitive, it will be replaced.
@ -62,20 +72,7 @@ func RedactSensitiveText(input string) (string, int) {
 			}
 		}

-		if bearerRE.MatchString(line) {
-			lines[i] = bearerRE.ReplaceAllString(line, "Authorization: Bearer [REDACTED]")
-			redactions++
-			continue
-		}
-
-		if awsAccessKeyRE.MatchString(line) {
-			lines[i] = awsAccessKeyRE.ReplaceAllString(line, "[REDACTED_AWS_ACCESS_KEY]")
-			redactions++
-		}
-		if jwtRE.MatchString(lines[i]) {
-			lines[i] = jwtRE.ReplaceAllString(lines[i], "[REDACTED_JWT]")
-			redactions++
-		}
+		lines[i], redactions = redactLineSecretPatterns(lines[i], redactions)
 	}

 	// Drop empty lines introduced by PEM redaction.
@ -88,3 +85,98 @@ func RedactSensitiveText(input string) (string, int) {
 	}
 	return strings.Join(outLines, "\n"), redactions
 }
+
+func redactLineSecretPatterns(line string, redactions int) (string, int) {
+	var count int
+	line, count = replaceAllCounting(quotedKVSecretRE, line, `${1}`+redactedSecretValue)
+	redactions += count
+	line, count = replaceAllCounting(querySecretRE, line, `${1}`+redactedSecretValue)
+	redactions += count
+	line, count = replaceAllCounting(bearerRE, line, `${1}`+redactedSecretValue)
+	redactions += count
+	line, count = replaceAllCounting(xAPIKeyHeaderRE, line, `${1}`+redactedSecretValue)
+	redactions += count
+	line, count = replaceAllCounting(urlUserInfoRE, line, `${1}`+redactedSecretValue+"@")
+	redactions += count
+	line, count = replaceAllCounting(awsAccessKeyRE, line, "[REDACTED_AWS_ACCESS_KEY]")
+	redactions += count
+	line, count = replaceAllCounting(jwtRE, line, "[REDACTED_JWT]")
+	redactions += count
+	line, count = replaceAllCounting(openAIKeyRE, line, "[REDACTED_PROVIDER_KEY]")
+	redactions += count
+	line, count = replaceAllCounting(googleAPIKeyRE, line, "[REDACTED_PROVIDER_KEY]")
+	redactions += count
+	line, count = replaceAllCounting(githubTokenRE, line, "[REDACTED_PROVIDER_TOKEN]")
+	redactions += count
+	return line, redactions
+}
+
+func replaceAllCounting(re *regexp.Regexp, input string, replacement string) (string, int) {
+	matches := re.FindAllStringIndex(input, -1)
+	if len(matches) == 0 {
+		return input, 0
+	}
+	return re.ReplaceAllString(input, replacement), len(matches)
+}
+
+// RedactSensitiveFieldValue redacts text with key context. It catches bland
+// values that are only secret-shaped because the surrounding field says so,
+// such as a tool-call input named api_key.
+func RedactSensitiveFieldValue(fieldName string, input string) (string, int) {
+	if !IsSensitiveFieldName(fieldName) {
+		return RedactSensitiveText(input)
+	}
+	return RedactSensitiveValue(input)
+}
+
+// RedactSensitiveValue redacts a structured value that the caller already
+// knows belongs to a sensitive field or schema value slot.
+func RedactSensitiveValue(input string) (string, int) {
+	redacted, count := RedactSensitiveText(input)
+	if strings.TrimSpace(redacted) == "" {
+		return redacted, count
+	}
+	if redacted == redactedSecretValue {
+		return redacted, count
+	}
+	return redactedSecretValue, count + 1
+}
+
+// IsSensitiveFieldName reports whether a structured key usually carries a
+// credential or secret-bearing value.
+func IsSensitiveFieldName(name string) bool {
+	normalized := normalizedFieldName(name)
+	if normalized == "" {
+		return false
+	}
+	switch normalized {
+	case "password", "passwd", "passphrase", "secret", "token", "apikey", "clientsecret",
+		"privatekey", "accesstoken", "refreshtoken", "authorization", "xapikey", "credential",
+		"credentials":
+		return true
+	default:
+		return false
+	}
+}
+
+// IsSensitiveValueCarrierFieldName reports whether a nested field commonly
+// carries an example, default, or literal value for a sensitive schema property.
+func IsSensitiveValueCarrierFieldName(name string) bool {
+	normalized := normalizedFieldName(name)
+	switch normalized {
+	case "value", "values", "default", "defaults", "example", "examples", "enum", "const":
+		return true
+	default:
+		return false
+	}
+}
+
+func normalizedFieldName(name string) string {
+	var b strings.Builder
+	for _, r := range strings.ToLower(strings.TrimSpace(name)) {
+		if r >= 'a' && r <= 'z' || r >= '0' && r <= '9' {
+			b.WriteRune(r)
+		}
+	}
+	return b.String()
+}
--- a/internal/ai/safety/redaction_test.go
+++ b/internal/ai/safety/redaction_test.go
@ -0,0 +1,63 @@
+package safety
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestRedactSensitiveTextCoversStructuredPromptSecrets(t *testing.T) {
+	input := strings.Join([]string{
+		`password: hunter2`,
+		`{"api_key":"plain-json-secret","access_token":"access-token-value"}`,
+		`GET https://example.test/v1?key=AIzaSySecretTokenValue&region=us`,
+		`Authorization: Bearer sk-live-secret-token`,
+		`x-api-key: sk-provider-secret`,
+		`https://operator:password@example.test/v1`,
+		`github=ghp_abcdefghijklmnopqrstuvwxyz`,
+	}, "\n")
+
+	redacted, count := RedactSensitiveText(input)
+	if count == 0 {
+		t.Fatal("expected redactions")
+	}
+	for _, forbidden := range []string{
+		"hunter2",
+		"plain-json-secret",
+		"access-token-value",
+		"AIzaSySecretTokenValue",
+		"sk-live-secret-token",
+		"sk-provider-secret",
+		"operator:password@",
+		"ghp_abcdefghijklmnopqrstuvwxyz",
+	} {
+		if strings.Contains(redacted, forbidden) {
+			t.Fatalf("redacted text leaked %q:\n%s", forbidden, redacted)
+		}
+	}
+	for _, retained := range []string{
+		"password:",
+		`"api_key":"[REDACTED]"`,
+		"Authorization: Bearer [REDACTED]",
+		"x-api-key: [REDACTED]",
+		"https://[REDACTED]@example.test/v1",
+	} {
+		if !strings.Contains(redacted, retained) {
+			t.Fatalf("redacted text missing retained context %q:\n%s", retained, redacted)
+		}
+	}
+}
+
+func TestRedactSensitiveFieldValueUsesKeyContext(t *testing.T) {
+	redacted, count := RedactSensitiveFieldValue("client_secret", "plain-value-without-token-shape")
+	if redacted != "[REDACTED]" {
+		t.Fatalf("redacted value = %q, want marker", redacted)
+	}
+	if count == 0 {
+		t.Fatal("expected key-context redaction count")
+	}
+
+	unchanged, count := RedactSensitiveFieldValue("display_name", "plain-value-without-token-shape")
+	if unchanged != "plain-value-without-token-shape" || count != 0 {
+		t.Fatalf("non-sensitive field redaction = %q count=%d", unchanged, count)
+	}
+}