mirror of
https://github.com/anomalyco/opencode.git
synced 2026-05-06 08:21:50 +00:00
fix: sanitize surrogates (#25934)
This commit is contained in:
parent
837cc92586
commit
6409aceb1a
2 changed files with 164 additions and 5 deletions
|
|
@ -1,4 +1,4 @@
|
|||
import type { ModelMessage } from "ai"
|
||||
import type { ModelMessage, ToolResultPart } from "ai"
|
||||
import { mergeDeep, unique } from "remeda"
|
||||
import type { JSONSchema7 } from "@ai-sdk/provider"
|
||||
import type { JSONSchema } from "zod/v4/core"
|
||||
|
|
@ -19,6 +19,10 @@ function mimeToModality(mime: string): Modality | undefined {
|
|||
|
||||
export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000
|
||||
|
||||
export function sanitizeSurrogates(content: string) {
|
||||
return content.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g, "\uFFFD")
|
||||
}
|
||||
|
||||
// Maps npm package to the key the AI SDK expects for providerOptions
|
||||
function sdkKey(npm: string): string | undefined {
|
||||
switch (npm) {
|
||||
|
|
@ -52,11 +56,74 @@ function sdkKey(npm: string): string | undefined {
|
|||
return undefined
|
||||
}
|
||||
|
||||
// TODO: fix this stupid inefficient dogshit function
|
||||
function normalizeMessages(
|
||||
msgs: ModelMessage[],
|
||||
model: Provider.Model,
|
||||
_options: Record<string, unknown>,
|
||||
): ModelMessage[] {
|
||||
const sanitizeToolResultOutput = (content: ToolResultPart) => {
|
||||
if (content.output.type === "text" || content.output.type === "error-text") {
|
||||
content.output.value = sanitizeSurrogates(content.output.value)
|
||||
}
|
||||
if (content.output.type === "content") {
|
||||
content.output.value = content.output.value.map((item) => {
|
||||
if (item.type === "text") {
|
||||
item.text = sanitizeSurrogates(item.text)
|
||||
}
|
||||
return item
|
||||
})
|
||||
}
|
||||
return content
|
||||
}
|
||||
|
||||
msgs = msgs.map((msg) => {
|
||||
switch (msg.role) {
|
||||
case "tool":
|
||||
if (!Array.isArray(msg.content)) return msg
|
||||
msg.content = msg.content.map((content) => {
|
||||
if (content.type === "tool-result") {
|
||||
return sanitizeToolResultOutput(content)
|
||||
}
|
||||
return content
|
||||
})
|
||||
return msg
|
||||
|
||||
case "system":
|
||||
msg.content = sanitizeSurrogates(msg.content)
|
||||
return msg
|
||||
|
||||
case "user":
|
||||
if (typeof msg.content === "string") {
|
||||
msg.content = sanitizeSurrogates(msg.content)
|
||||
} else {
|
||||
msg.content = msg.content.map((content) => {
|
||||
if (content.type === "text") {
|
||||
content.text = sanitizeSurrogates(content.text)
|
||||
}
|
||||
return content
|
||||
})
|
||||
}
|
||||
return msg
|
||||
|
||||
case "assistant":
|
||||
if (typeof msg.content === "string") {
|
||||
msg.content = sanitizeSurrogates(msg.content)
|
||||
} else {
|
||||
msg.content = msg.content.map((content) => {
|
||||
if (content.type === "text" || content.type === "reasoning") {
|
||||
content.text = sanitizeSurrogates(content.text)
|
||||
}
|
||||
if (content.type === "tool-result") {
|
||||
return sanitizeToolResultOutput(content)
|
||||
}
|
||||
return content
|
||||
})
|
||||
}
|
||||
return msg
|
||||
}
|
||||
})
|
||||
|
||||
// Anthropic rejects messages with empty content - filter out empty string messages
|
||||
// and remove empty text/reasoning parts from array content
|
||||
if (model.api.npm === "@ai-sdk/anthropic") {
|
||||
|
|
|
|||
|
|
@ -1123,6 +1123,98 @@ describe("ProviderTransform.message - DeepSeek reasoning content", () => {
|
|||
})
|
||||
})
|
||||
|
||||
describe("ProviderTransform.message - surrogate sanitization", () => {
|
||||
const model = {
|
||||
id: "test/test-model",
|
||||
providerID: "test",
|
||||
api: {
|
||||
id: "test-model",
|
||||
url: "https://api.test.com",
|
||||
npm: "@ai-sdk/openai-compatible",
|
||||
},
|
||||
name: "Test Model",
|
||||
capabilities: {
|
||||
temperature: true,
|
||||
reasoning: true,
|
||||
attachment: true,
|
||||
toolcall: true,
|
||||
input: { text: true, audio: false, image: true, video: false, pdf: false },
|
||||
output: { text: true, audio: false, image: false, video: false, pdf: false },
|
||||
interleaved: false,
|
||||
},
|
||||
cost: { input: 0.001, output: 0.002, cache: { read: 0.0001, write: 0.0002 } },
|
||||
limit: { context: 128000, output: 8192 },
|
||||
status: "active",
|
||||
options: {},
|
||||
headers: {},
|
||||
} as any
|
||||
|
||||
test("replaces lone surrogates in model-visible text", () => {
|
||||
const lone = "\uD83D"
|
||||
const valid = "🚀"
|
||||
const sanitized = "<22>"
|
||||
const text = (label: string) => `${label} ${lone} and ${valid}`
|
||||
const expected = (label: string) => `${label} ${sanitized} and ${valid}`
|
||||
const msgs = [
|
||||
{ role: "system", content: text("system") },
|
||||
{ role: "user", content: text("user string") },
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: text("user text") },
|
||||
{ type: "image", image: "data:image/png;base64,abcd" },
|
||||
],
|
||||
},
|
||||
{ role: "assistant", content: text("assistant string") },
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "text", text: text("assistant text") },
|
||||
{ type: "reasoning", text: text("assistant reasoning") },
|
||||
{ type: "tool-call", toolCallId: "call-1", toolName: "Read", input: { filePath: ".opencode/tool/emoji.ts" } },
|
||||
{ type: "tool-result", toolCallId: "call-2", toolName: "Read", output: { type: "text", value: text("assistant tool text") } },
|
||||
{ type: "tool-result", toolCallId: "call-3", toolName: "Read", output: { type: "error-text", value: text("assistant tool error") } },
|
||||
{
|
||||
type: "tool-result",
|
||||
toolCallId: "call-4",
|
||||
toolName: "Read",
|
||||
output: { type: "content", value: [{ type: "text", text: text("assistant tool content") }] },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: "tool",
|
||||
content: [
|
||||
{ type: "tool-result", toolCallId: "call-5", toolName: "Read", output: { type: "text", value: text("tool text") } },
|
||||
{ type: "tool-result", toolCallId: "call-6", toolName: "Read", output: { type: "error-text", value: text("tool error") } },
|
||||
{
|
||||
type: "tool-result",
|
||||
toolCallId: "call-7",
|
||||
toolName: "Read",
|
||||
output: { type: "content", value: [{ type: "text", text: text("tool content") }] },
|
||||
},
|
||||
],
|
||||
},
|
||||
] as any[]
|
||||
|
||||
const result = ProviderTransform.message(msgs, model, {}) as any[]
|
||||
|
||||
expect(result[0].content).toBe(expected("system"))
|
||||
expect(result[1].content).toBe(expected("user string"))
|
||||
expect(result[2].content[0].text).toBe(expected("user text"))
|
||||
expect(result[3].content).toBe(expected("assistant string"))
|
||||
expect(result[4].content[0].text).toBe(expected("assistant text"))
|
||||
expect(result[4].content[1].text).toBe(expected("assistant reasoning"))
|
||||
expect(result[4].content[3].output.value).toBe(expected("assistant tool text"))
|
||||
expect(result[4].content[4].output.value).toBe(expected("assistant tool error"))
|
||||
expect(result[4].content[5].output.value[0].text).toBe(expected("assistant tool content"))
|
||||
expect(result[5].content[0].output.value).toBe(expected("tool text"))
|
||||
expect(result[5].content[1].output.value).toBe(expected("tool error"))
|
||||
expect(result[5].content[2].output.value[0].text).toBe(expected("tool content"))
|
||||
expect(result[2].content[1]).toEqual({ type: "image", image: "data:image/png;base64,abcd" })
|
||||
})
|
||||
})
|
||||
|
||||
describe("ProviderTransform.message - empty image handling", () => {
|
||||
const mockModel = {
|
||||
id: "anthropic/claude-3-5-sonnet",
|
||||
|
|
@ -1993,7 +2085,7 @@ describe("ProviderTransform.message - bedrock caching with non-bedrock providerI
|
|||
const msgs = [
|
||||
{
|
||||
role: "system",
|
||||
content: [{ type: "text", text: "You are a helpful assistant" }],
|
||||
content: "You are a helpful assistant",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
|
|
@ -2007,7 +2099,7 @@ describe("ProviderTransform.message - bedrock caching with non-bedrock providerI
|
|||
expect(result[0].providerOptions?.bedrock).toEqual({
|
||||
cachePoint: { type: "default" },
|
||||
})
|
||||
expect(result[0].content[0].providerOptions?.bedrock).toBeUndefined()
|
||||
expect(result[0].content).toBe("You are a helpful assistant")
|
||||
})
|
||||
})
|
||||
|
||||
|
|
@ -2044,7 +2136,7 @@ describe("ProviderTransform.message - cache control on gateway", () => {
|
|||
const msgs = [
|
||||
{
|
||||
role: "system",
|
||||
content: [{ type: "text", text: "You are a helpful assistant" }],
|
||||
content: "You are a helpful assistant",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
|
|
@ -2054,7 +2146,7 @@ describe("ProviderTransform.message - cache control on gateway", () => {
|
|||
|
||||
const result = ProviderTransform.message(msgs, model, {}) as any[]
|
||||
|
||||
expect(result[0].content[0].providerOptions).toBeUndefined()
|
||||
expect(result[0].content).toBe("You are a helpful assistant")
|
||||
expect(result[0].providerOptions).toBeUndefined()
|
||||
})
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue