Fix Gemini/Gemma streaming, autocompact, and tool schema sanitization

- gemini.util.ts: flush pendingContent on stream end, fixing lost text for Gemma models with thinking but no thoughtSignature (e.g. gemma-4-31b-it); map Gemini 400 context-too-long errors to finish_reason "model_context_window_exceeded" to trigger Claude Code autocompact; remove temporary debug logs - anthropic.transformer.ts: pass through model_context_window_exceeded as stop_reason in both streaming and non-streaming paths - schema.ts (new): sanitizeJsonSchema strips unknown fields via whitelist and removes invalid format/enum/uri combinations; fixes bug where property schemas bypassed the whitelist; normalizeToolParameters wraps it for tools - groq.transformer.ts, mistral.util.ts: use normalizeToolParameters for full schema sanitization instead of only deleting $schema - gemini.transformer.ts: resolve model name from context before URL build - vertex-gemini.transformer.ts: fall back to provider.model when request.model is empty
2026-05-29 19:10:19 +00:00 · 2026-04-25 23:35:20 +02:00 · 2026-04-25 23:35:20 +02:00 · 41b74d65a0
commit 41b74d65a0
parent 3bb5323444
7 changed files with 454 additions and 121 deletions
--- a/packages/core/src/transformer/anthropic.transformer.ts
+++ b/packages/core/src/transformer/anthropic.transformer.ts
@ -884,6 +884,8 @@ export class AnthropicTransformer implements Transformer {
                      length: "max_tokens",
                      tool_calls: "tool_use",
                      content_filter: "stop_sequence",
+                      model_context_window_exceeded:
+                        "model_context_window_exceeded",
                    };

                    const anthropicStopReason =
@ -1039,6 +1041,8 @@ export class AnthropicTransformer implements Transformer {
            ? "tool_use"
            : choice.finish_reason === "content_filter"
            ? "stop_sequence"
+            : choice.finish_reason === "model_context_window_exceeded"
+            ? "model_context_window_exceeded"
            : "end_turn",
        stop_sequence: null,
        usage: {
--- a/packages/core/src/transformer/gemini.transformer.ts
+++ b/packages/core/src/transformer/gemini.transformer.ts
@ -13,14 +13,15 @@ export class GeminiTransformer implements Transformer {

  async transformRequestIn(
    request: UnifiedChatRequest,
-    provider: LLMProvider
+    provider: LLMProvider,
+    context: any
  ): Promise<Record<string, any>> {
+    const model = context?.req?.model || request.model || provider.model || "";
    return {
      body: buildRequestBody(request),
      config: {
        url: new URL(
-          `./${request.model}:${
-            request.stream ? "streamGenerateContent?alt=sse" : "generateContent"
+          `./${model}:${request.stream ? "streamGenerateContent?alt=sse" : "generateContent"
          }`,
          provider.baseUrl
        ),
--- a/packages/core/src/transformer/groq.transformer.ts
+++ b/packages/core/src/transformer/groq.transformer.ts
@ -2,6 +2,7 @@ import { UnifiedChatRequest } from "@/types/llm";
 import { Transformer } from "../types/transformer";
 import { createSSEStreamReader, StreamContext, encodeSSEData, encodeSSELine } from "../utils/stream";
 import { stripMessagesCacheControl } from "../utils/cacheControl";
+import { normalizeToolParameters } from "../utils/schema";
 import { v4 as uuidv4 } from "uuid";

 export class GroqTransformer implements Transformer {
@ -12,7 +13,9 @@ export class GroqTransformer implements Transformer {

    if (Array.isArray(request.tools)) {
      request.tools.forEach(tool => {
-        delete tool.function.parameters.$schema;
+        if (tool?.function?.parameters) {
+          tool.function.parameters = normalizeToolParameters(tool.function.parameters);
+        }
      });
    }
    return request;
--- a/packages/core/src/transformer/vertex-gemini.transformer.ts
+++ b/packages/core/src/transformer/vertex-gemini.transformer.ts
@ -58,7 +58,7 @@ export class VertexGeminiTransformer implements Transformer {
      body: buildRequestBody(request),
      config: {
        url: new URL(
-          `./v1beta1/projects/${projectId}/locations/${location}/publishers/google/models/${request.model}:${request.stream ? "streamGenerateContent" : "generateContent"}`,
+          `./v1beta1/projects/${projectId}/locations/${location}/publishers/google/models/${request.model || provider.model || ""}:${request.stream ? "streamGenerateContent" : "generateContent"}`,
            provider.baseUrl.endsWith('/') ? provider.baseUrl : provider.baseUrl + '/' || `https://${location}-aiplatform.googleapis.com`
        ),
        headers: {
--- a/packages/core/src/utils/gemini.util.ts
+++ b/packages/core/src/utils/gemini.util.ts
@ -1,67 +1,6 @@
 import { UnifiedChatRequest, UnifiedMessage } from "../types/llm";
 import { Content, ContentListUnion, Part, ToolListUnion } from "@google/genai";
-
-export function cleanupParameters(obj: any, keyName?: string): void {
-  if (!obj || typeof obj !== "object") {
-    return;
-  }
-
-  if (Array.isArray(obj)) {
-    obj.forEach((item) => {
-      cleanupParameters(item);
-    });
-    return;
-  }
-
-  const validFields = new Set([
-    "type",
-    "format",
-    "title",
-    "description",
-    "nullable",
-    "enum",
-    "maxItems",
-    "minItems",
-    "properties",
-    "required",
-    "minProperties",
-    "maxProperties",
-    "minLength",
-    "maxLength",
-    "pattern",
-    "example",
-    "anyOf",
-    "propertyOrdering",
-    "default",
-    "items",
-    "minimum",
-    "maximum",
-  ]);
-
-  if (keyName !== "properties") {
-    Object.keys(obj).forEach((key) => {
-      if (!validFields.has(key)) {
-        delete obj[key];
-      }
-    });
-  }
-
-  if (obj.enum && obj.type !== "string") {
-    delete obj.enum;
-  }
-
-  if (
-    obj.type === "string" &&
-    obj.format &&
-    !["enum", "date-time"].includes(obj.format)
-  ) {
-    delete obj.format;
-  }
-
-  Object.keys(obj).forEach((key) => {
-    cleanupParameters(obj[key], key);
-  });
-}
+import { sanitizeJsonSchema } from "./schema";

 // Type enum equivalent in JavaScript
 const Type = {
@ -209,48 +148,47 @@ export function tTool(tool: any): any {
  if (tool.functionDeclarations) {
    for (const functionDeclaration of tool.functionDeclarations) {
      if (functionDeclaration.parameters) {
-        if (!Object.keys(functionDeclaration.parameters).includes("$schema")) {
-          functionDeclaration.parameters = processJsonSchema(
-            functionDeclaration.parameters
-          );
-        } else {
-          if (!functionDeclaration.parametersJsonSchema) {
-            functionDeclaration.parametersJsonSchema =
-              functionDeclaration.parameters;
-            delete functionDeclaration.parameters;
-          }
-        }
+        const sanitized = sanitizeJsonSchema(functionDeclaration.parameters);
+        functionDeclaration.parameters = processJsonSchema(sanitized);
      }
      if (functionDeclaration.response) {
-        if (!Object.keys(functionDeclaration.response).includes("$schema")) {
-          functionDeclaration.response = processJsonSchema(
-            functionDeclaration.response
-          );
-        } else {
-          if (!functionDeclaration.responseJsonSchema) {
-            functionDeclaration.responseJsonSchema =
-              functionDeclaration.response;
-            delete functionDeclaration.response;
-          }
-        }
+        const sanitized = sanitizeJsonSchema(functionDeclaration.response);
+        functionDeclaration.response = processJsonSchema(sanitized);
      }
    }
  }
  return tool;
 }

+/** Normalize a tool to unified format (handles both OpenAI and Anthropic tool shapes) */
+function normalizeTool(tool: any): { name: string; description: string; parameters: any } {
+  if (tool.function?.name) {
+    return { name: tool.function.name, description: tool.function.description, parameters: tool.function.parameters };
+  }
+  return { name: tool.name, description: tool.description, parameters: tool.input_schema };
+}
+
+/** Sanitize a function name for Gemini's naming rules:
+ * Must start with a letter or underscore, contain only [a-zA-Z0-9_.:\-], max 128 chars */
+function sanitizeGeminiFunctionName(name: string): string {
+  if (!name) return "unnamed_function";
+  let sanitized = name.replace(/[^a-zA-Z0-9_.:\-]/g, "_");
+  if (/^[^a-zA-Z_]/.test(sanitized)) {
+    sanitized = "_" + sanitized;
+  }
+  return sanitized.substring(0, 128);
+}
+
 export function buildRequestBody(
  request: UnifiedChatRequest
 ): Record<string, any> {
  const tools = [];
-  const functionDeclarations = request.tools
-    ?.filter((tool) => tool.function.name !== "web_search")
-    ?.map((tool) => {
-      return {
-        name: tool.function.name,
-        description: tool.function.description,
-        parametersJsonSchema: tool.function.parameters,
-      };
+  const requestTools = request.tools || [];
+  const functionDeclarations = requestTools
+    .filter((tool) => normalizeTool(tool).name !== "web_search")
+    .map((tool) => {
+      const { name, description, parameters } = normalizeTool(tool);
+      return { name: sanitizeGeminiFunctionName(name), description, parameters };
    });
  if (functionDeclarations?.length) {
    tools.push(
@ -259,8 +197,8 @@ export function buildRequestBody(
      })
    );
  }
-  const webSearch = request.tools?.find(
-    (tool) => tool.function.name === "web_search"
+  const webSearch = requestTools.find(
+    (tool) => normalizeTool(tool).name === "web_search"
  );
  if (webSearch) {
    tools.push({
@ -269,20 +207,75 @@ export function buildRequestBody(
  }

  const contents: any[] = [];
-  const toolResponses = request.messages.filter((item) => item.role === "tool");
-  request.messages
-    .filter((item) => item.role !== "tool")
-    .forEach((message: UnifiedMessage) => {
-      let role: "user" | "model";
-      if (message.role === "assistant") {
-        role = "model";
-      } else if (["user", "system"].includes(message.role)) {
-        role = "user";
-      } else {
-        role = "user"; // Default to user if role is not recognized
+  const rawMessages = request.messages || [];
+
+
+
+  // Collect system instructions from request.system and system role messages
+  const systemTexts: string[] = [];
+  const extractText = (content: any): void => {
+    if (typeof content === "string") {
+      if (content) systemTexts.push(content);
+    } else if (Array.isArray(content)) {
+      for (const part of content) {
+        if (part?.type === "text" && part.text) systemTexts.push(part.text);
+        else if (typeof part === "string" && part) systemTexts.push(part);
      }
-      const parts = [];
-      if (typeof message.content === "string") {
+    }
+  };
+  if (request.system) extractText(request.system);
+  for (const msg of rawMessages) {
+    if (msg.role === "system") extractText(msg.content);
+  }
+
+  const messages: UnifiedMessage[] = [];
+
+  for (const msg of rawMessages) {
+    if (msg.role === "tool" || msg.role === "system") continue;
+
+    const role = msg.role === "assistant" ? "assistant" : "user";
+    const lastMsg = messages[messages.length - 1];
+
+    if (lastMsg && lastMsg.role === role) {
+      const lastContent = lastMsg.content;
+      const currentContent = msg.content;
+
+      if (typeof lastContent === "string" && typeof currentContent === "string") {
+        lastMsg.content = lastContent + "\n" + currentContent;
+      } else if (Array.isArray(lastContent) && Array.isArray(currentContent)) {
+        lastMsg.content = [...lastContent, ...currentContent];
+      } else if (typeof lastContent === "string" && Array.isArray(currentContent)) {
+        lastMsg.content = [
+          { type: "text", text: lastContent },
+          ...currentContent,
+        ];
+      } else if (Array.isArray(lastContent) && typeof currentContent === "string") {
+        lastMsg.content = [
+          ...lastContent,
+          { type: "text", text: currentContent },
+        ];
+      } else {
+        // Fallback: just push as a new message and hope for the best,
+        // or force a role change. For Gemini, we MUST alternate.
+        // To force alternation, we can insert a dummy model message if needed,
+        // but merging is preferred.
+        messages.push({ ...msg, role });
+      }
+    } else {
+      messages.push({ ...msg, role });
+    }
+  }
+
+  const toolResponses = rawMessages.filter((item) => item.role === "tool");
+  messages.forEach((message: UnifiedMessage) => {
+    let role: "user" | "model";
+    if (message.role === "assistant") {
+      role = "model";
+    } else {
+      role = "user";
+    }
+    const parts = [];
+    if (typeof message.content === "string") {
        const part: any = {
          text: message.content,
        };
@ -317,7 +310,8 @@ export function buildRequestBody(
                };
              }
            }
-          })
+            return null;
+          }).filter(Boolean)
        );
      } else if (message.content && typeof message.content === "object") {
        // Object like { text: "..." }
@ -410,11 +404,16 @@ export function buildRequestBody(
    }
  }

-  const body = {
-    contents,
+  const body: Record<string, any> = {
+    contents: contents.length ? contents : [{ role: "user", parts: [{ text: "" }] }],
    tools: tools.length ? tools : undefined,
    generationConfig,
  };
+  if (systemTexts.length) {
+    body.systemInstruction = {
+      parts: [{ text: systemTexts.join("\n\n") }],
+    };
+  }

  if (request.tool_choice) {
    const toolConfig = {
@ -522,6 +521,54 @@ export async function transformResponseOut(
    const jsonResponse: any = await response.json();
    logger?.debug({ response: jsonResponse }, `${providerName} response:`);

+    if (response.status >= 400) {
+      const errorMessage: string = jsonResponse.error?.message || "";
+      const lowerMessage = errorMessage.toLowerCase();
+      const isContextExceeded = [
+        "user input too long",
+        "input too long",
+        "prompt is too long",
+        "exceeds the token limit",
+        "request payload size exceeds",
+        "context_length_exceeded",
+      ].some((phrase) => lowerMessage.includes(phrase));
+
+      if (isContextExceeded) {
+        const res = {
+          id: `ctxexceeded_${Date.now()}`,
+          choices: [
+            {
+              finish_reason: "model_context_window_exceeded",
+              index: 0,
+              message: { content: "", role: "assistant" },
+            },
+          ],
+          created: Math.floor(Date.now() / 1000),
+          model: "",
+          object: "chat.completion",
+          usage: { completion_tokens: 0, prompt_tokens: 0, total_tokens: 0 },
+        };
+        return new Response(JSON.stringify(res), {
+          status: 200,
+          headers: { "Content-Type": "application/json" },
+        });
+      }
+
+      return new Response(JSON.stringify(jsonResponse), {
+        status: response.status,
+        statusText: response.statusText,
+        headers: response.headers,
+      });
+    }
+
+    if (!jsonResponse.candidates || jsonResponse.candidates.length === 0) {
+      return new Response(JSON.stringify(jsonResponse), {
+        status: response.status,
+        statusText: response.statusText,
+        headers: response.headers,
+      });
+    }
+
    // Extract thinking content from parts with thought: true
    let thinkingContent = "";
    let thinkingSignature = "";
@ -568,7 +615,7 @@ export async function transformResponseOut(
        {
          finish_reason:
            (
-              jsonResponse.candidates[0].finishReason as string
+              jsonResponse.candidates[0]?.finishReason as string
            )?.toLowerCase() || null,
          index: 0,
          message: {
@ -995,6 +1042,75 @@ export async function transformResponseOut(
                    contentSent = true;
                  }
                }
+
+                // Flush buffered text on stream end (e.g. Gemma models with thinking but no thoughtSignature)
+                if (candidate.finishReason && pendingContent) {
+                  if (!signatureSent && hasThinkingContent) {
+                    const signatureChunk = {
+                      choices: [
+                        {
+                          delta: {
+                            role: "assistant",
+                            content: null,
+                            thinking: { signature: `ccr_${+new Date()}` },
+                          },
+                          finish_reason: null,
+                          index: contentIndex,
+                          logprobs: null,
+                        },
+                      ],
+                      created: parseInt(new Date().getTime() / 1000 + "", 10),
+                      id: chunk.responseId || "",
+                      model: chunk.modelVersion || "",
+                      object: "chat.completion.chunk",
+                      system_fingerprint: "fp_a49d71b8a1",
+                    };
+                    controller.enqueue(
+                      encoder.encode(
+                        `data: ${JSON.stringify(signatureChunk)}\n\n`
+                      )
+                    );
+                    signatureSent = true;
+                    contentIndex++;
+                  }
+                  const flushRes = {
+                    choices: [
+                      {
+                        delta: {
+                          role: "assistant",
+                          content: pendingContent,
+                        },
+                        finish_reason: candidate.finishReason.toLowerCase(),
+                        index: contentIndex,
+                        logprobs: null,
+                      },
+                    ],
+                    created: parseInt(new Date().getTime() / 1000 + "", 10),
+                    id: chunk.responseId || "",
+                    model: chunk.modelVersion || "",
+                    object: "chat.completion.chunk",
+                    system_fingerprint: "fp_a49d71b8a1",
+                    usage: {
+                      completion_tokens:
+                        chunk.usageMetadata?.candidatesTokenCount || 0,
+                      prompt_tokens: chunk.usageMetadata?.promptTokenCount || 0,
+                      prompt_tokens_details: {
+                        cached_tokens:
+                          chunk.usageMetadata?.cachedContentTokenCount || 0,
+                      },
+                      total_tokens: chunk.usageMetadata?.totalTokenCount || 0,
+                      output_tokens_details: {
+                        reasoning_tokens:
+                          chunk.usageMetadata?.thoughtsTokenCount || 0,
+                      },
+                    },
+                  };
+                  controller.enqueue(
+                    encoder.encode(`data: ${JSON.stringify(flushRes)}\n\n`)
+                  );
+                  pendingContent = "";
+                  contentSent = true;
+                }
              } catch (error: any) {
                logger?.error(
                  `Error parsing ${providerName} stream chunk`,
--- a/packages/core/src/utils/mistral.util.ts
+++ b/packages/core/src/utils/mistral.util.ts
@ -9,6 +9,41 @@ import {
  extractReasoningText,
  cleanReasoningFields,
 } from "./thinking";
+import { normalizeToolParameters } from "./schema";
+
+// Type definitions for Mistral API responses
+interface MistralStreamChunk {
+  id: string;
+  object: "chat.completion.chunk";
+  created: number;
+  model: string;
+  choices: Array<{
+    index: number;
+    delta: {
+      role?: string;
+      content?: string | null;
+      reasoning_content?: string;
+      thinking?: { content?: string; signature?: string };
+      tool_calls?: Array<{
+        index: number;
+        id: string;
+        function: { name: string; arguments: string };
+      }>;
+    };
+    finish_reason: string | null;
+  }>;
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+}
+
+interface MistralMessageContent {
+  type: "thinking" | "text";
+  text?: string;
+  thinking?: any;
+}

 /**
 * Helper to flatten array content to strings and remove cache_control
@ -111,17 +146,15 @@ export function buildRequestBody(request: UnifiedChatRequest): Record<string, an
    req.tool_choice = transformToolChoice(req.tool_choice);
  }

-  // 4. Tool Cleanup
+  // 4. Tool Cleanup - normalize schemas and remove $schema
  if (Array.isArray(req.tools)) {
    req.tools = req.tools.map((tool) => {
-      if (tool?.function?.parameters?.$schema) {
-        const params = { ...tool.function.parameters };
-        delete params.$schema;
+      if (tool?.function?.parameters) {
        return {
          ...tool,
          function: {
            ...tool.function,
-            parameters: params,
+            parameters: normalizeToolParameters(tool.function.parameters),
          },
        };
      }
--- a/packages/core/src/utils/schema.ts
+++ b/packages/core/src/utils/schema.ts
@ -0,0 +1,176 @@
+/**
+ * Valid JSON Schema fields. Unknown fields are stripped to prevent
+ * API rejections from providers like Gemini and Mistral.
+ */
+const VALID_SCHEMA_FIELDS = new Set([
+  "type",
+  "format",
+  "title",
+  "description",
+  "nullable",
+  "enum",
+  "maxItems",
+  "minItems",
+  "properties",
+  "required",
+  "minProperties",
+  "maxProperties",
+  "minLength",
+  "maxLength",
+  "pattern",
+  "example",
+  "anyOf",
+  "propertyOrdering",
+  "default",
+  "items",
+  "minimum",
+  "maximum",
+  "additionalProperties",
+  "allOf",
+  "oneOf",
+]);
+
+/**
+ * Recursively removes format: 'uri' from JSON schemas.
+ * Some providers (Gemini, Mistral) reject schemas with this format.
+ */
+function removeUriFormat(schema: any): any {
+  if (!schema || typeof schema !== "object") return schema;
+
+  if (schema.type === "string" && schema.format === "uri") {
+    const { format, ...rest } = schema;
+    return rest;
+  }
+
+  if (Array.isArray(schema)) {
+    return schema.map((item: any) => removeUriFormat(item));
+  }
+
+  const result: any = {};
+  for (const key in schema) {
+    if (key === "properties") {
+      result[key] = {};
+      for (const propKey in schema[key]) {
+        result[key][propKey] = removeUriFormat(schema[key][propKey]);
+      }
+    } else if (key === "items") {
+      result[key] = removeUriFormat(schema[key]);
+    } else if (
+      ["anyOf", "allOf", "oneOf"].includes(key) &&
+      Array.isArray(schema[key])
+    ) {
+      result[key] = schema[key].map((item: any) => removeUriFormat(item));
+    } else {
+      result[key] = removeUriFormat(schema[key]);
+    }
+  }
+  return result;
+}
+
+/**
+ * Recursively sanitizes a JSON schema by:
+ * 1. Stripping unknown fields (not in VALID_SCHEMA_FIELDS)
+ * 2. Removing enum from non-string types
+ * 3. Removing format from string types (except "enum" and "date-time")
+ * 4. Removing format: 'uri' from any type
+ * 5. Removing $schema field
+ *
+ * This is a superset of normalizeJsonSchema that also handles
+ * field whitelisting and enum/format cleanup needed by providers
+ * like Gemini and Mistral.
+ */
+export function sanitizeJsonSchema(
+  schema: any,
+  parentKey?: string
+): any {
+  if (!schema || typeof schema !== "object") return schema;
+
+  if (Array.isArray(schema)) {
+    return schema.map((item: any) => sanitizeJsonSchema(item, parentKey));
+  }
+
+  const result: any = {};
+
+  // Step 1: Strip unknown fields (preserve all keys inside "properties")
+  for (const key in schema) {
+    if (parentKey !== "properties" && !VALID_SCHEMA_FIELDS.has(key)) {
+      continue;
+    }
+
+    let value = schema[key];
+
+    // Step 2: Remove enum from non-string types
+    if (key === "enum" && schema.type && schema.type !== "string") {
+      continue;
+    }
+
+    // Step 3: Remove format from string types (except enum/date-time)
+    if (
+      key === "format" &&
+      schema.type === "string" &&
+      value &&
+      !["enum", "date-time"].includes(value)
+    ) {
+      continue;
+    }
+
+    // Step 4: Remove format: 'uri'
+    if (key === "format" && value === "uri") {
+      continue;
+    }
+
+    // Step 5: Remove $schema
+    if (key === "$schema") {
+      continue;
+    }
+
+    // Recurse into nested structures
+    if (key === "properties" && typeof value === "object" && !Array.isArray(value)) {
+      const props: any = {};
+      for (const propKey in value) {
+        props[propKey] = sanitizeJsonSchema(value[propKey]);
+      }
+      result[key] = props;
+    } else if (key === "items") {
+      result[key] = sanitizeJsonSchema(value, key);
+    } else if (
+      ["anyOf", "allOf", "oneOf"].includes(key) &&
+      Array.isArray(value)
+    ) {
+      result[key] = value.map((item: any) => sanitizeJsonSchema(item, key));
+    } else {
+      result[key] = value;
+    }
+  }
+
+  return result;
+}
+
+/**
+ * Normalizes a JSON schema by removing fields that may cause API rejections:
+ * - format: 'uri' from string types
+ * - $schema field
+ *
+ * For full sanitization (field whitelist, enum/format cleanup), use sanitizeJsonSchema instead.
+ */
+export function normalizeJsonSchema(schema: any): any {
+  if (!schema || typeof schema !== "object") return schema;
+
+  const normalized = removeUriFormat(schema);
+
+  if (normalized.$schema) {
+    delete normalized.$schema;
+  }
+
+  return normalized;
+}
+
+/**
+ * Normalizes tool function parameters by removing problematic fields.
+ * Uses sanitizeJsonSchema for thorough cleanup.
+ */
+export function normalizeToolParameters(parameters: any): any {
+  if (!parameters || typeof parameters !== "object") return parameters;
+
+  return sanitizeJsonSchema(parameters);
+}