Harden external model prompt-secret sanitation

This commit is contained in:
rcourtman 2026-05-13 13:02:29 +01:00
parent 31331b5451
commit 0fbcc90184
4 changed files with 405 additions and 23 deletions

View file

@ -0,0 +1,119 @@
package modelboundary
import (
"strings"
"testing"
"github.com/rcourtman/pulse-go-rewrite/internal/ai/providers"
)
func TestRequestSanitizerForModelRedactsPromptSecretsWithoutResourcePolicy(t *testing.T) {
sanitizer := RequestSanitizerForModel("anthropic:claude-3-5-sonnet", nil)
if sanitizer == nil {
t.Fatal("expected external model sanitizer without resource provider")
}
req := providers.ChatRequest{
System: "Use this only if needed: password: system-password",
Messages: []providers.Message{
{
Role: "user",
Content: `Operator prompt includes {"api_key":"json-secret-value"}`,
ReasoningContent: "Authorization: Bearer sk-reasoning-secret",
ToolCalls: []providers.ToolCall{{
ID: "tool-1",
Name: "pulse_report",
Input: map[string]interface{}{
"api_key": "plain-tool-key",
"credentials": map[string]interface{}{
"value": "nested-credential-value",
"metadata": "safe metadata",
},
"safe": []interface{}{"keep-me", "sk-provider-token"},
},
}},
},
{
Role: "tool",
ToolResult: &providers.ToolResult{ToolUseID: "tool-1", Content: "x-api-key: sk-tool-result-secret"},
},
},
Tools: []providers.Tool{{
Name: "pulse_report",
Description: "Call report narrator with access_token=tool-description-token",
InputSchema: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"api_key": map[string]interface{}{
"type": "string",
"description": "Provider API key",
"default": "schema-default-secret",
"examples": []interface{}{"schema-example-secret"},
},
},
},
}},
}
got := sanitizer(req)
combined := strings.Join([]string{
got.System,
got.Messages[0].Content,
got.Messages[0].ReasoningContent,
got.Messages[1].ToolResult.Content,
got.Tools[0].Description,
}, "\n")
for _, forbidden := range []string{
"system-password",
"json-secret-value",
"sk-reasoning-secret",
"sk-tool-result-secret",
"tool-description-token",
} {
if strings.Contains(combined, forbidden) {
t.Fatalf("sanitized request leaked %q:\n%s", forbidden, combined)
}
}
input := got.Messages[0].ToolCalls[0].Input
if input["api_key"] != "[REDACTED]" {
t.Fatalf("tool call api_key = %#v, want redacted marker", input["api_key"])
}
credentials := input["credentials"].(map[string]interface{})
if credentials["value"] != "[REDACTED]" {
t.Fatalf("nested credential value = %#v, want redacted marker", credentials["value"])
}
if credentials["metadata"] != "safe metadata" {
t.Fatalf("non-value credential metadata was changed: %#v", credentials["metadata"])
}
safeValues := input["safe"].([]interface{})
if safeValues[0] != "keep-me" {
t.Fatalf("safe non-secret value was changed: %#v", safeValues[0])
}
if strings.Contains(safeValues[1].(string), "sk-provider-token") {
t.Fatalf("provider-shaped token in safe field was not redacted: %#v", safeValues[1])
}
properties := got.Tools[0].InputSchema["properties"].(map[string]interface{})
apiKeyProperty := properties["api_key"].(map[string]interface{})
if apiKeyProperty["type"] != "string" {
t.Fatalf("schema type was changed: %#v", apiKeyProperty["type"])
}
if apiKeyProperty["description"] != "Provider API key" {
t.Fatalf("schema description was changed: %#v", apiKeyProperty["description"])
}
if apiKeyProperty["default"] != "[REDACTED]" {
t.Fatalf("schema default = %#v, want redacted marker", apiKeyProperty["default"])
}
examples := apiKeyProperty["examples"].([]interface{})
if examples[0] != "[REDACTED]" {
t.Fatalf("schema example = %#v, want redacted marker", examples[0])
}
if req.Messages[0].ToolCalls[0].Input["api_key"] != "plain-tool-key" {
t.Fatalf("sanitizer mutated original tool call input: %#v", req.Messages[0].ToolCalls[0].Input["api_key"])
}
if req.Tools[0].InputSchema["type"] != "object" {
t.Fatalf("sanitizer mutated original tool schema: %#v", req.Tools[0].InputSchema["type"])
}
}

View file

@ -4,6 +4,7 @@ import (
"strings"
"github.com/rcourtman/pulse-go-rewrite/internal/ai/providers"
"github.com/rcourtman/pulse-go-rewrite/internal/ai/safety"
"github.com/rcourtman/pulse-go-rewrite/internal/config"
"github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
)
@ -20,17 +21,18 @@ type allUnifiedResourceProvider interface {
// RequestSanitizerForModel returns a sanitizer for non-local model traffic.
// It is intentionally applied at the final provider transport boundary so
// later tool-result turns cannot bypass the resource-policy posture exported
// to the operator-facing Data Handling surface.
// operator-entered prompts, handoff text, tool-result turns, and provider-bound
// tool schemas cannot bypass prompt-secret or resource-policy sanitation.
func RequestSanitizerForModel(model string, provider UnifiedResourceProvider) func(providers.ChatRequest) providers.ChatRequest {
if !ModelUsesExternalProvider(model) || provider == nil {
if !ModelUsesExternalProvider(model) {
return nil
}
resources := resourcePolicySanitizerResources(provider)
if len(resources) == 0 {
return nil
}
return func(req providers.ChatRequest) providers.ChatRequest {
req = sanitizeProviderRequestForPromptSecrets(req)
if len(resources) == 0 {
return req
}
return sanitizeProviderRequestForResources(req, resources)
}
}
@ -191,3 +193,109 @@ func sanitizeResourcePolicyValue(value interface{}, resources []unifiedresources
return value
}
}
func sanitizeProviderRequestForPromptSecrets(req providers.ChatRequest) providers.ChatRequest {
req.System = sanitizePromptSecretText(req.System)
if len(req.Messages) > 0 {
req.Messages = append([]providers.Message(nil), req.Messages...)
for i := range req.Messages {
req.Messages[i] = sanitizeProviderMessageForPromptSecrets(req.Messages[i])
}
}
if len(req.Tools) > 0 {
req.Tools = append([]providers.Tool(nil), req.Tools...)
for i := range req.Tools {
req.Tools[i] = sanitizeProviderToolForPromptSecrets(req.Tools[i])
}
}
return req
}
func sanitizeProviderMessageForPromptSecrets(msg providers.Message) providers.Message {
msg.Content = sanitizePromptSecretText(msg.Content)
msg.ReasoningContent = sanitizePromptSecretText(msg.ReasoningContent)
if msg.ToolResult != nil {
toolResult := *msg.ToolResult
toolResult.Content = sanitizePromptSecretText(toolResult.Content)
msg.ToolResult = &toolResult
}
if len(msg.ToolCalls) > 0 {
msg.ToolCalls = append([]providers.ToolCall(nil), msg.ToolCalls...)
for i := range msg.ToolCalls {
msg.ToolCalls[i].Input = sanitizePromptSecretMap(msg.ToolCalls[i].Input, false)
}
}
return msg
}
func sanitizeProviderToolForPromptSecrets(tool providers.Tool) providers.Tool {
tool.Description = sanitizePromptSecretText(tool.Description)
tool.InputSchema = sanitizePromptSecretMap(tool.InputSchema, false)
return tool
}
func sanitizePromptSecretText(value string) string {
redacted, _ := safety.RedactSensitiveText(value)
return redacted
}
func sanitizePromptSecretSensitiveValue(value string) string {
redacted, _ := safety.RedactSensitiveValue(value)
return redacted
}
func sanitizePromptSecretMap(values map[string]interface{}, sensitiveParent bool) map[string]interface{} {
if len(values) == 0 {
return values
}
sanitized := make(map[string]interface{}, len(values))
for key, value := range values {
keyIsSensitive := safety.IsSensitiveFieldName(key)
valueIsSensitive := keyIsSensitive || sensitiveParent && safety.IsSensitiveValueCarrierFieldName(key)
sanitized[key] = sanitizePromptSecretValue(key, value, valueIsSensitive)
}
return sanitized
}
func sanitizePromptSecretValue(fieldName string, value interface{}, sensitiveValue bool) interface{} {
switch typed := value.(type) {
case string:
if sensitiveValue {
return sanitizePromptSecretSensitiveValue(typed)
}
return sanitizePromptSecretText(typed)
case []string:
out := make([]string, len(typed))
for i := range typed {
if sensitiveValue {
out[i] = sanitizePromptSecretSensitiveValue(typed[i])
continue
}
out[i] = sanitizePromptSecretText(typed[i])
}
return out
case []interface{}:
out := make([]interface{}, len(typed))
for i := range typed {
out[i] = sanitizePromptSecretValue(fieldName, typed[i], sensitiveValue)
}
return out
case map[string]interface{}:
return sanitizePromptSecretMap(typed, sensitiveValue)
case map[string]string:
out := make(map[string]string, len(typed))
for key, nested := range typed {
keyIsSensitive := safety.IsSensitiveFieldName(key)
valueIsSensitive := keyIsSensitive || sensitiveValue && safety.IsSensitiveValueCarrierFieldName(key)
if valueIsSensitive {
out[key] = sanitizePromptSecretSensitiveValue(nested)
continue
}
out[key] = sanitizePromptSecretText(nested)
}
return out
default:
return value
}
}

View file

@ -11,16 +11,26 @@ var (
pemEndRE = regexp.MustCompile(`(?m)^-----END [A-Z0-9 ][A-Z0-9 ]+-----\s*$`)
// Common secret-bearing key/value patterns.
kvSecretRE = regexp.MustCompile(`(?i)\b(password|passwd|passphrase|secret|token|api[_-]?key|client[_-]?secret|private[_-]?key)\b\s*[:=]\s*(.+)$`)
kvSecretRE = regexp.MustCompile(`(?i)\b(password|passwd|passphrase|secret|token|api[_-]?key|client[_-]?secret|private[_-]?key|access[_-]?token|refresh[_-]?token)\b\s*[:=]\s*(.+)$`)
// Authorization bearer header.
bearerRE = regexp.MustCompile(`(?i)\bauthorization\s*:\s*bearer\s+([A-Za-z0-9\-._~+/]+=*)`)
// Structured, URL, and header forms that commonly appear in prompts,
// provider errors, tool schemas, and operator-entered handoff context.
quotedKVSecretRE = regexp.MustCompile(`(?i)("(?:password|passwd|passphrase|secret|token|api[_-]?key|apikey|client[_-]?secret|private[_-]?key|access[_-]?token|refresh[_-]?token|authorization|x-api-key|credential)"\s*:\s*")[^"]+`)
querySecretRE = regexp.MustCompile(`(?i)([?&](?:key|api[_-]?key|apikey|access[_-]?token|refresh[_-]?token|token|client[_-]?secret|secret)=)[^\s&"']+`)
bearerRE = regexp.MustCompile(`(?i)(\bauthorization\s*:\s*bearer\s+)([A-Za-z0-9\-._~+/]+=*)`)
xAPIKeyHeaderRE = regexp.MustCompile(`(?i)(\bx-api-key\s*:\s*)[^\s,;]+`)
urlUserInfoRE = regexp.MustCompile(`(?i)(https?://)[^\s/@:]+:[^\s/@]+@`)
// Common token formats to reduce accidental leakage even when not in k=v form.
awsAccessKeyRE = regexp.MustCompile(`\b(AKIA|ASIA)[0-9A-Z]{16}\b`)
jwtRE = regexp.MustCompile(`\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b`)
openAIKeyRE = regexp.MustCompile(`\bsk-[A-Za-z0-9_-]{8,}\b`)
googleAPIKeyRE = regexp.MustCompile(`\bAIza[0-9A-Za-z_-]{10,}\b`)
githubTokenRE = regexp.MustCompile(`\bgh[opsur]_[A-Za-z0-9_]{10,}\b`)
)
const redactedSecretValue = "[REDACTED]"
// RedactSensitiveText removes likely-secret material from text outputs to reduce accidental
// key/token leakage through AI tool results. It is intentionally conservative: if a value
// looks sensitive, it will be replaced.
@ -62,20 +72,7 @@ func RedactSensitiveText(input string) (string, int) {
}
}
if bearerRE.MatchString(line) {
lines[i] = bearerRE.ReplaceAllString(line, "Authorization: Bearer [REDACTED]")
redactions++
continue
}
if awsAccessKeyRE.MatchString(line) {
lines[i] = awsAccessKeyRE.ReplaceAllString(line, "[REDACTED_AWS_ACCESS_KEY]")
redactions++
}
if jwtRE.MatchString(lines[i]) {
lines[i] = jwtRE.ReplaceAllString(lines[i], "[REDACTED_JWT]")
redactions++
}
lines[i], redactions = redactLineSecretPatterns(lines[i], redactions)
}
// Drop empty lines introduced by PEM redaction.
@ -88,3 +85,98 @@ func RedactSensitiveText(input string) (string, int) {
}
return strings.Join(outLines, "\n"), redactions
}
func redactLineSecretPatterns(line string, redactions int) (string, int) {
var count int
line, count = replaceAllCounting(quotedKVSecretRE, line, `${1}`+redactedSecretValue)
redactions += count
line, count = replaceAllCounting(querySecretRE, line, `${1}`+redactedSecretValue)
redactions += count
line, count = replaceAllCounting(bearerRE, line, `${1}`+redactedSecretValue)
redactions += count
line, count = replaceAllCounting(xAPIKeyHeaderRE, line, `${1}`+redactedSecretValue)
redactions += count
line, count = replaceAllCounting(urlUserInfoRE, line, `${1}`+redactedSecretValue+"@")
redactions += count
line, count = replaceAllCounting(awsAccessKeyRE, line, "[REDACTED_AWS_ACCESS_KEY]")
redactions += count
line, count = replaceAllCounting(jwtRE, line, "[REDACTED_JWT]")
redactions += count
line, count = replaceAllCounting(openAIKeyRE, line, "[REDACTED_PROVIDER_KEY]")
redactions += count
line, count = replaceAllCounting(googleAPIKeyRE, line, "[REDACTED_PROVIDER_KEY]")
redactions += count
line, count = replaceAllCounting(githubTokenRE, line, "[REDACTED_PROVIDER_TOKEN]")
redactions += count
return line, redactions
}
func replaceAllCounting(re *regexp.Regexp, input string, replacement string) (string, int) {
matches := re.FindAllStringIndex(input, -1)
if len(matches) == 0 {
return input, 0
}
return re.ReplaceAllString(input, replacement), len(matches)
}
// RedactSensitiveFieldValue redacts text with key context. It catches bland
// values that are only secret-shaped because the surrounding field says so,
// such as a tool-call input named api_key.
func RedactSensitiveFieldValue(fieldName string, input string) (string, int) {
if !IsSensitiveFieldName(fieldName) {
return RedactSensitiveText(input)
}
return RedactSensitiveValue(input)
}
// RedactSensitiveValue redacts a structured value that the caller already
// knows belongs to a sensitive field or schema value slot.
func RedactSensitiveValue(input string) (string, int) {
redacted, count := RedactSensitiveText(input)
if strings.TrimSpace(redacted) == "" {
return redacted, count
}
if redacted == redactedSecretValue {
return redacted, count
}
return redactedSecretValue, count + 1
}
// IsSensitiveFieldName reports whether a structured key usually carries a
// credential or secret-bearing value.
func IsSensitiveFieldName(name string) bool {
normalized := normalizedFieldName(name)
if normalized == "" {
return false
}
switch normalized {
case "password", "passwd", "passphrase", "secret", "token", "apikey", "clientsecret",
"privatekey", "accesstoken", "refreshtoken", "authorization", "xapikey", "credential",
"credentials":
return true
default:
return false
}
}
// IsSensitiveValueCarrierFieldName reports whether a nested field commonly
// carries an example, default, or literal value for a sensitive schema property.
func IsSensitiveValueCarrierFieldName(name string) bool {
normalized := normalizedFieldName(name)
switch normalized {
case "value", "values", "default", "defaults", "example", "examples", "enum", "const":
return true
default:
return false
}
}
func normalizedFieldName(name string) string {
var b strings.Builder
for _, r := range strings.ToLower(strings.TrimSpace(name)) {
if r >= 'a' && r <= 'z' || r >= '0' && r <= '9' {
b.WriteRune(r)
}
}
return b.String()
}

View file

@ -0,0 +1,63 @@
package safety
import (
"strings"
"testing"
)
func TestRedactSensitiveTextCoversStructuredPromptSecrets(t *testing.T) {
input := strings.Join([]string{
`password: hunter2`,
`{"api_key":"plain-json-secret","access_token":"access-token-value"}`,
`GET https://example.test/v1?key=AIzaSySecretTokenValue&region=us`,
`Authorization: Bearer sk-live-secret-token`,
`x-api-key: sk-provider-secret`,
`https://operator:password@example.test/v1`,
`github=ghp_abcdefghijklmnopqrstuvwxyz`,
}, "\n")
redacted, count := RedactSensitiveText(input)
if count == 0 {
t.Fatal("expected redactions")
}
for _, forbidden := range []string{
"hunter2",
"plain-json-secret",
"access-token-value",
"AIzaSySecretTokenValue",
"sk-live-secret-token",
"sk-provider-secret",
"operator:password@",
"ghp_abcdefghijklmnopqrstuvwxyz",
} {
if strings.Contains(redacted, forbidden) {
t.Fatalf("redacted text leaked %q:\n%s", forbidden, redacted)
}
}
for _, retained := range []string{
"password:",
`"api_key":"[REDACTED]"`,
"Authorization: Bearer [REDACTED]",
"x-api-key: [REDACTED]",
"https://[REDACTED]@example.test/v1",
} {
if !strings.Contains(redacted, retained) {
t.Fatalf("redacted text missing retained context %q:\n%s", retained, redacted)
}
}
}
func TestRedactSensitiveFieldValueUsesKeyContext(t *testing.T) {
redacted, count := RedactSensitiveFieldValue("client_secret", "plain-value-without-token-shape")
if redacted != "[REDACTED]" {
t.Fatalf("redacted value = %q, want marker", redacted)
}
if count == 0 {
t.Fatal("expected key-context redaction count")
}
unchanged, count := RedactSensitiveFieldValue("display_name", "plain-value-without-token-shape")
if unchanged != "plain-value-without-token-shape" || count != 0 {
t.Fatalf("non-sensitive field redaction = %q count=%d", unchanged, count)
}
}