Fix Gemini/Gemma streaming, autocompact, and tool schema sanitization

- gemini.util.ts: flush pendingContent on stream end, fixing lost text for
  Gemma models with thinking but no thoughtSignature (e.g. gemma-4-31b-it);
  map Gemini 400 context-too-long errors to finish_reason
  "model_context_window_exceeded" to trigger Claude Code autocompact;
  remove temporary debug logs

- anthropic.transformer.ts: pass through model_context_window_exceeded as
  stop_reason in both streaming and non-streaming paths

- schema.ts (new): sanitizeJsonSchema strips unknown fields via whitelist and
  removes invalid format/enum/uri combinations; fixes bug where property
  schemas bypassed the whitelist; normalizeToolParameters wraps it for tools

- groq.transformer.ts, mistral.util.ts: use normalizeToolParameters for full
  schema sanitization instead of only deleting $schema

- gemini.transformer.ts: resolve model name from context before URL build

- vertex-gemini.transformer.ts: fall back to provider.model when request.model
  is empty
This commit is contained in:
o.akimov 2026-04-25 23:35:20 +02:00
parent 3bb5323444
commit 41b74d65a0
7 changed files with 454 additions and 121 deletions

View file

@ -884,6 +884,8 @@ export class AnthropicTransformer implements Transformer {
length: "max_tokens",
tool_calls: "tool_use",
content_filter: "stop_sequence",
model_context_window_exceeded:
"model_context_window_exceeded",
};
const anthropicStopReason =
@ -1039,6 +1041,8 @@ export class AnthropicTransformer implements Transformer {
? "tool_use"
: choice.finish_reason === "content_filter"
? "stop_sequence"
: choice.finish_reason === "model_context_window_exceeded"
? "model_context_window_exceeded"
: "end_turn",
stop_sequence: null,
usage: {

View file

@ -13,14 +13,15 @@ export class GeminiTransformer implements Transformer {
async transformRequestIn(
request: UnifiedChatRequest,
provider: LLMProvider
provider: LLMProvider,
context: any
): Promise<Record<string, any>> {
const model = context?.req?.model || request.model || provider.model || "";
return {
body: buildRequestBody(request),
config: {
url: new URL(
`./${request.model}:${
request.stream ? "streamGenerateContent?alt=sse" : "generateContent"
`./${model}:${request.stream ? "streamGenerateContent?alt=sse" : "generateContent"
}`,
provider.baseUrl
),

View file

@ -2,6 +2,7 @@ import { UnifiedChatRequest } from "@/types/llm";
import { Transformer } from "../types/transformer";
import { createSSEStreamReader, StreamContext, encodeSSEData, encodeSSELine } from "../utils/stream";
import { stripMessagesCacheControl } from "../utils/cacheControl";
import { normalizeToolParameters } from "../utils/schema";
import { v4 as uuidv4 } from "uuid";
export class GroqTransformer implements Transformer {
@ -12,7 +13,9 @@ export class GroqTransformer implements Transformer {
if (Array.isArray(request.tools)) {
request.tools.forEach(tool => {
delete tool.function.parameters.$schema;
if (tool?.function?.parameters) {
tool.function.parameters = normalizeToolParameters(tool.function.parameters);
}
});
}
return request;

View file

@ -58,7 +58,7 @@ export class VertexGeminiTransformer implements Transformer {
body: buildRequestBody(request),
config: {
url: new URL(
`./v1beta1/projects/${projectId}/locations/${location}/publishers/google/models/${request.model}:${request.stream ? "streamGenerateContent" : "generateContent"}`,
`./v1beta1/projects/${projectId}/locations/${location}/publishers/google/models/${request.model || provider.model || ""}:${request.stream ? "streamGenerateContent" : "generateContent"}`,
provider.baseUrl.endsWith('/') ? provider.baseUrl : provider.baseUrl + '/' || `https://${location}-aiplatform.googleapis.com`
),
headers: {

View file

@ -1,67 +1,6 @@
import { UnifiedChatRequest, UnifiedMessage } from "../types/llm";
import { Content, ContentListUnion, Part, ToolListUnion } from "@google/genai";
export function cleanupParameters(obj: any, keyName?: string): void {
if (!obj || typeof obj !== "object") {
return;
}
if (Array.isArray(obj)) {
obj.forEach((item) => {
cleanupParameters(item);
});
return;
}
const validFields = new Set([
"type",
"format",
"title",
"description",
"nullable",
"enum",
"maxItems",
"minItems",
"properties",
"required",
"minProperties",
"maxProperties",
"minLength",
"maxLength",
"pattern",
"example",
"anyOf",
"propertyOrdering",
"default",
"items",
"minimum",
"maximum",
]);
if (keyName !== "properties") {
Object.keys(obj).forEach((key) => {
if (!validFields.has(key)) {
delete obj[key];
}
});
}
if (obj.enum && obj.type !== "string") {
delete obj.enum;
}
if (
obj.type === "string" &&
obj.format &&
!["enum", "date-time"].includes(obj.format)
) {
delete obj.format;
}
Object.keys(obj).forEach((key) => {
cleanupParameters(obj[key], key);
});
}
import { sanitizeJsonSchema } from "./schema";
// Type enum equivalent in JavaScript
const Type = {
@ -209,48 +148,47 @@ export function tTool(tool: any): any {
if (tool.functionDeclarations) {
for (const functionDeclaration of tool.functionDeclarations) {
if (functionDeclaration.parameters) {
if (!Object.keys(functionDeclaration.parameters).includes("$schema")) {
functionDeclaration.parameters = processJsonSchema(
functionDeclaration.parameters
);
} else {
if (!functionDeclaration.parametersJsonSchema) {
functionDeclaration.parametersJsonSchema =
functionDeclaration.parameters;
delete functionDeclaration.parameters;
}
}
const sanitized = sanitizeJsonSchema(functionDeclaration.parameters);
functionDeclaration.parameters = processJsonSchema(sanitized);
}
if (functionDeclaration.response) {
if (!Object.keys(functionDeclaration.response).includes("$schema")) {
functionDeclaration.response = processJsonSchema(
functionDeclaration.response
);
} else {
if (!functionDeclaration.responseJsonSchema) {
functionDeclaration.responseJsonSchema =
functionDeclaration.response;
delete functionDeclaration.response;
}
}
const sanitized = sanitizeJsonSchema(functionDeclaration.response);
functionDeclaration.response = processJsonSchema(sanitized);
}
}
}
return tool;
}
/** Normalize a tool to unified format (handles both OpenAI and Anthropic tool shapes) */
function normalizeTool(tool: any): { name: string; description: string; parameters: any } {
if (tool.function?.name) {
return { name: tool.function.name, description: tool.function.description, parameters: tool.function.parameters };
}
return { name: tool.name, description: tool.description, parameters: tool.input_schema };
}
/** Sanitize a function name for Gemini's naming rules:
* Must start with a letter or underscore, contain only [a-zA-Z0-9_.:\-], max 128 chars */
function sanitizeGeminiFunctionName(name: string): string {
if (!name) return "unnamed_function";
let sanitized = name.replace(/[^a-zA-Z0-9_.:\-]/g, "_");
if (/^[^a-zA-Z_]/.test(sanitized)) {
sanitized = "_" + sanitized;
}
return sanitized.substring(0, 128);
}
export function buildRequestBody(
request: UnifiedChatRequest
): Record<string, any> {
const tools = [];
const functionDeclarations = request.tools
?.filter((tool) => tool.function.name !== "web_search")
?.map((tool) => {
return {
name: tool.function.name,
description: tool.function.description,
parametersJsonSchema: tool.function.parameters,
};
const requestTools = request.tools || [];
const functionDeclarations = requestTools
.filter((tool) => normalizeTool(tool).name !== "web_search")
.map((tool) => {
const { name, description, parameters } = normalizeTool(tool);
return { name: sanitizeGeminiFunctionName(name), description, parameters };
});
if (functionDeclarations?.length) {
tools.push(
@ -259,8 +197,8 @@ export function buildRequestBody(
})
);
}
const webSearch = request.tools?.find(
(tool) => tool.function.name === "web_search"
const webSearch = requestTools.find(
(tool) => normalizeTool(tool).name === "web_search"
);
if (webSearch) {
tools.push({
@ -269,20 +207,75 @@ export function buildRequestBody(
}
const contents: any[] = [];
const toolResponses = request.messages.filter((item) => item.role === "tool");
request.messages
.filter((item) => item.role !== "tool")
.forEach((message: UnifiedMessage) => {
let role: "user" | "model";
if (message.role === "assistant") {
role = "model";
} else if (["user", "system"].includes(message.role)) {
role = "user";
} else {
role = "user"; // Default to user if role is not recognized
const rawMessages = request.messages || [];
// Collect system instructions from request.system and system role messages
const systemTexts: string[] = [];
const extractText = (content: any): void => {
if (typeof content === "string") {
if (content) systemTexts.push(content);
} else if (Array.isArray(content)) {
for (const part of content) {
if (part?.type === "text" && part.text) systemTexts.push(part.text);
else if (typeof part === "string" && part) systemTexts.push(part);
}
const parts = [];
if (typeof message.content === "string") {
}
};
if (request.system) extractText(request.system);
for (const msg of rawMessages) {
if (msg.role === "system") extractText(msg.content);
}
const messages: UnifiedMessage[] = [];
for (const msg of rawMessages) {
if (msg.role === "tool" || msg.role === "system") continue;
const role = msg.role === "assistant" ? "assistant" : "user";
const lastMsg = messages[messages.length - 1];
if (lastMsg && lastMsg.role === role) {
const lastContent = lastMsg.content;
const currentContent = msg.content;
if (typeof lastContent === "string" && typeof currentContent === "string") {
lastMsg.content = lastContent + "\n" + currentContent;
} else if (Array.isArray(lastContent) && Array.isArray(currentContent)) {
lastMsg.content = [...lastContent, ...currentContent];
} else if (typeof lastContent === "string" && Array.isArray(currentContent)) {
lastMsg.content = [
{ type: "text", text: lastContent },
...currentContent,
];
} else if (Array.isArray(lastContent) && typeof currentContent === "string") {
lastMsg.content = [
...lastContent,
{ type: "text", text: currentContent },
];
} else {
// Fallback: just push as a new message and hope for the best,
// or force a role change. For Gemini, we MUST alternate.
// To force alternation, we can insert a dummy model message if needed,
// but merging is preferred.
messages.push({ ...msg, role });
}
} else {
messages.push({ ...msg, role });
}
}
const toolResponses = rawMessages.filter((item) => item.role === "tool");
messages.forEach((message: UnifiedMessage) => {
let role: "user" | "model";
if (message.role === "assistant") {
role = "model";
} else {
role = "user";
}
const parts = [];
if (typeof message.content === "string") {
const part: any = {
text: message.content,
};
@ -317,7 +310,8 @@ export function buildRequestBody(
};
}
}
})
return null;
}).filter(Boolean)
);
} else if (message.content && typeof message.content === "object") {
// Object like { text: "..." }
@ -410,11 +404,16 @@ export function buildRequestBody(
}
}
const body = {
contents,
const body: Record<string, any> = {
contents: contents.length ? contents : [{ role: "user", parts: [{ text: "" }] }],
tools: tools.length ? tools : undefined,
generationConfig,
};
if (systemTexts.length) {
body.systemInstruction = {
parts: [{ text: systemTexts.join("\n\n") }],
};
}
if (request.tool_choice) {
const toolConfig = {
@ -522,6 +521,54 @@ export async function transformResponseOut(
const jsonResponse: any = await response.json();
logger?.debug({ response: jsonResponse }, `${providerName} response:`);
if (response.status >= 400) {
const errorMessage: string = jsonResponse.error?.message || "";
const lowerMessage = errorMessage.toLowerCase();
const isContextExceeded = [
"user input too long",
"input too long",
"prompt is too long",
"exceeds the token limit",
"request payload size exceeds",
"context_length_exceeded",
].some((phrase) => lowerMessage.includes(phrase));
if (isContextExceeded) {
const res = {
id: `ctxexceeded_${Date.now()}`,
choices: [
{
finish_reason: "model_context_window_exceeded",
index: 0,
message: { content: "", role: "assistant" },
},
],
created: Math.floor(Date.now() / 1000),
model: "",
object: "chat.completion",
usage: { completion_tokens: 0, prompt_tokens: 0, total_tokens: 0 },
};
return new Response(JSON.stringify(res), {
status: 200,
headers: { "Content-Type": "application/json" },
});
}
return new Response(JSON.stringify(jsonResponse), {
status: response.status,
statusText: response.statusText,
headers: response.headers,
});
}
if (!jsonResponse.candidates || jsonResponse.candidates.length === 0) {
return new Response(JSON.stringify(jsonResponse), {
status: response.status,
statusText: response.statusText,
headers: response.headers,
});
}
// Extract thinking content from parts with thought: true
let thinkingContent = "";
let thinkingSignature = "";
@ -568,7 +615,7 @@ export async function transformResponseOut(
{
finish_reason:
(
jsonResponse.candidates[0].finishReason as string
jsonResponse.candidates[0]?.finishReason as string
)?.toLowerCase() || null,
index: 0,
message: {
@ -995,6 +1042,75 @@ export async function transformResponseOut(
contentSent = true;
}
}
// Flush buffered text on stream end (e.g. Gemma models with thinking but no thoughtSignature)
if (candidate.finishReason && pendingContent) {
if (!signatureSent && hasThinkingContent) {
const signatureChunk = {
choices: [
{
delta: {
role: "assistant",
content: null,
thinking: { signature: `ccr_${+new Date()}` },
},
finish_reason: null,
index: contentIndex,
logprobs: null,
},
],
created: parseInt(new Date().getTime() / 1000 + "", 10),
id: chunk.responseId || "",
model: chunk.modelVersion || "",
object: "chat.completion.chunk",
system_fingerprint: "fp_a49d71b8a1",
};
controller.enqueue(
encoder.encode(
`data: ${JSON.stringify(signatureChunk)}\n\n`
)
);
signatureSent = true;
contentIndex++;
}
const flushRes = {
choices: [
{
delta: {
role: "assistant",
content: pendingContent,
},
finish_reason: candidate.finishReason.toLowerCase(),
index: contentIndex,
logprobs: null,
},
],
created: parseInt(new Date().getTime() / 1000 + "", 10),
id: chunk.responseId || "",
model: chunk.modelVersion || "",
object: "chat.completion.chunk",
system_fingerprint: "fp_a49d71b8a1",
usage: {
completion_tokens:
chunk.usageMetadata?.candidatesTokenCount || 0,
prompt_tokens: chunk.usageMetadata?.promptTokenCount || 0,
prompt_tokens_details: {
cached_tokens:
chunk.usageMetadata?.cachedContentTokenCount || 0,
},
total_tokens: chunk.usageMetadata?.totalTokenCount || 0,
output_tokens_details: {
reasoning_tokens:
chunk.usageMetadata?.thoughtsTokenCount || 0,
},
},
};
controller.enqueue(
encoder.encode(`data: ${JSON.stringify(flushRes)}\n\n`)
);
pendingContent = "";
contentSent = true;
}
} catch (error: any) {
logger?.error(
`Error parsing ${providerName} stream chunk`,

View file

@ -9,6 +9,41 @@ import {
extractReasoningText,
cleanReasoningFields,
} from "./thinking";
import { normalizeToolParameters } from "./schema";
// Type definitions for Mistral API responses
interface MistralStreamChunk {
id: string;
object: "chat.completion.chunk";
created: number;
model: string;
choices: Array<{
index: number;
delta: {
role?: string;
content?: string | null;
reasoning_content?: string;
thinking?: { content?: string; signature?: string };
tool_calls?: Array<{
index: number;
id: string;
function: { name: string; arguments: string };
}>;
};
finish_reason: string | null;
}>;
usage?: {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
};
}
interface MistralMessageContent {
type: "thinking" | "text";
text?: string;
thinking?: any;
}
/**
* Helper to flatten array content to strings and remove cache_control
@ -111,17 +146,15 @@ export function buildRequestBody(request: UnifiedChatRequest): Record<string, an
req.tool_choice = transformToolChoice(req.tool_choice);
}
// 4. Tool Cleanup
// 4. Tool Cleanup - normalize schemas and remove $schema
if (Array.isArray(req.tools)) {
req.tools = req.tools.map((tool) => {
if (tool?.function?.parameters?.$schema) {
const params = { ...tool.function.parameters };
delete params.$schema;
if (tool?.function?.parameters) {
return {
...tool,
function: {
...tool.function,
parameters: params,
parameters: normalizeToolParameters(tool.function.parameters),
},
};
}

View file

@ -0,0 +1,176 @@
/**
* Valid JSON Schema fields. Unknown fields are stripped to prevent
* API rejections from providers like Gemini and Mistral.
*/
const VALID_SCHEMA_FIELDS = new Set([
"type",
"format",
"title",
"description",
"nullable",
"enum",
"maxItems",
"minItems",
"properties",
"required",
"minProperties",
"maxProperties",
"minLength",
"maxLength",
"pattern",
"example",
"anyOf",
"propertyOrdering",
"default",
"items",
"minimum",
"maximum",
"additionalProperties",
"allOf",
"oneOf",
]);
/**
* Recursively removes format: 'uri' from JSON schemas.
* Some providers (Gemini, Mistral) reject schemas with this format.
*/
function removeUriFormat(schema: any): any {
if (!schema || typeof schema !== "object") return schema;
if (schema.type === "string" && schema.format === "uri") {
const { format, ...rest } = schema;
return rest;
}
if (Array.isArray(schema)) {
return schema.map((item: any) => removeUriFormat(item));
}
const result: any = {};
for (const key in schema) {
if (key === "properties") {
result[key] = {};
for (const propKey in schema[key]) {
result[key][propKey] = removeUriFormat(schema[key][propKey]);
}
} else if (key === "items") {
result[key] = removeUriFormat(schema[key]);
} else if (
["anyOf", "allOf", "oneOf"].includes(key) &&
Array.isArray(schema[key])
) {
result[key] = schema[key].map((item: any) => removeUriFormat(item));
} else {
result[key] = removeUriFormat(schema[key]);
}
}
return result;
}
/**
* Recursively sanitizes a JSON schema by:
* 1. Stripping unknown fields (not in VALID_SCHEMA_FIELDS)
* 2. Removing enum from non-string types
* 3. Removing format from string types (except "enum" and "date-time")
* 4. Removing format: 'uri' from any type
* 5. Removing $schema field
*
* This is a superset of normalizeJsonSchema that also handles
* field whitelisting and enum/format cleanup needed by providers
* like Gemini and Mistral.
*/
export function sanitizeJsonSchema(
schema: any,
parentKey?: string
): any {
if (!schema || typeof schema !== "object") return schema;
if (Array.isArray(schema)) {
return schema.map((item: any) => sanitizeJsonSchema(item, parentKey));
}
const result: any = {};
// Step 1: Strip unknown fields (preserve all keys inside "properties")
for (const key in schema) {
if (parentKey !== "properties" && !VALID_SCHEMA_FIELDS.has(key)) {
continue;
}
let value = schema[key];
// Step 2: Remove enum from non-string types
if (key === "enum" && schema.type && schema.type !== "string") {
continue;
}
// Step 3: Remove format from string types (except enum/date-time)
if (
key === "format" &&
schema.type === "string" &&
value &&
!["enum", "date-time"].includes(value)
) {
continue;
}
// Step 4: Remove format: 'uri'
if (key === "format" && value === "uri") {
continue;
}
// Step 5: Remove $schema
if (key === "$schema") {
continue;
}
// Recurse into nested structures
if (key === "properties" && typeof value === "object" && !Array.isArray(value)) {
const props: any = {};
for (const propKey in value) {
props[propKey] = sanitizeJsonSchema(value[propKey]);
}
result[key] = props;
} else if (key === "items") {
result[key] = sanitizeJsonSchema(value, key);
} else if (
["anyOf", "allOf", "oneOf"].includes(key) &&
Array.isArray(value)
) {
result[key] = value.map((item: any) => sanitizeJsonSchema(item, key));
} else {
result[key] = value;
}
}
return result;
}
/**
* Normalizes a JSON schema by removing fields that may cause API rejections:
* - format: 'uri' from string types
* - $schema field
*
* For full sanitization (field whitelist, enum/format cleanup), use sanitizeJsonSchema instead.
*/
export function normalizeJsonSchema(schema: any): any {
if (!schema || typeof schema !== "object") return schema;
const normalized = removeUriFormat(schema);
if (normalized.$schema) {
delete normalized.$schema;
}
return normalized;
}
/**
* Normalizes tool function parameters by removing problematic fields.
* Uses sanitizeJsonSchema for thorough cleanup.
*/
export function normalizeToolParameters(parameters: any): any {
if (!parameters || typeof parameters !== "object") return parameters;
return sanitizeJsonSchema(parameters);
}