#!/usr/bin/env tsx import { writeFileSync } from "fs"; import { join, dirname } from "path"; import { fileURLToPath } from "url"; import { CLOUDFLARE_AI_GATEWAY_ANTHROPIC_BASE_URL, CLOUDFLARE_AI_GATEWAY_COMPAT_BASE_URL, CLOUDFLARE_AI_GATEWAY_OPENAI_BASE_URL, CLOUDFLARE_WORKERS_AI_BASE_URL, } from "../src/providers/cloudflare.js"; import { Api, type AnthropicMessagesCompat, KnownProvider, Model, type OpenAICompletionsCompat, } from "../src/types.js"; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); const packageRoot = join(__dirname, ".."); interface ModelsDevModel { id: string; name: string; tool_call?: boolean; reasoning?: boolean; limit?: { context?: number; output?: number; }; cost?: { input?: number; output?: number; cache_read?: number; cache_write?: number; }; modalities?: { input?: string[]; }; provider?: { npm?: string; }; } interface AiGatewayModel { id: string; name?: string; context_window?: number; max_tokens?: number; tags?: string[]; pricing?: { input?: string | number; output?: string | number; input_cache_read?: string | number; input_cache_write?: string | number; }; } const COPILOT_STATIC_HEADERS = { "User-Agent": "GitHubCopilotChat/0.35.0", "Editor-Version": "vscode/1.107.0", "Editor-Plugin-Version": "copilot-chat/0.35.0", "Copilot-Integration-Id": "vscode-chat", } as const; const KIMI_STATIC_HEADERS = { "User-Agent": "KimiCLI/1.5", } as const; const TOGETHER_BASE_URL = "https://api.together.ai/v1"; const TOGETHER_BASE_COMPAT: OpenAICompletionsCompat = { supportsStore: false, supportsDeveloperRole: false, supportsReasoningEffort: false, maxTokensField: "max_tokens", supportsStrictMode: false, supportsLongCacheRetention: false, }; const TOGETHER_TOGGLE_REASONING_COMPAT: OpenAICompletionsCompat = { ...TOGETHER_BASE_COMPAT, thinkingFormat: "together", }; const TOGETHER_REASONING_EFFORT_COMPAT: OpenAICompletionsCompat = { ...TOGETHER_BASE_COMPAT, supportsReasoningEffort: true, thinkingFormat: "openai", }; const TOGETHER_TOGGLE_REASONING_EFFORT_COMPAT: OpenAICompletionsCompat = { ...TOGETHER_TOGGLE_REASONING_COMPAT, supportsReasoningEffort: true, }; const TOGETHER_REASONING_ONLY_MODELS = new Set([ "deepseek-ai/DeepSeek-R1", "MiniMaxAI/MiniMax-M2.5", "MiniMaxAI/MiniMax-M2.7", ]); const TOGETHER_REASONING_EFFORT_MODELS = new Set(["openai/gpt-oss-20b", "openai/gpt-oss-120b"]); const TOGETHER_TOGGLE_REASONING_EFFORT_MODELS = new Set(["deepseek-ai/DeepSeek-V4-Pro"]); const TOGETHER_FIXED_REASONING_LEVEL_MAP = { off: null, minimal: null, low: null, medium: null, } as const; const TOGETHER_REASONING_EFFORT_LEVEL_MAP = { off: null, minimal: null, } as const; const TOGETHER_DEEPSEEK_V4_THINKING_LEVEL_MAP = { minimal: null, low: null, medium: null, high: "high", xhigh: null, } as const; const TOGETHER_TOGGLE_REASONING_LEVEL_MAP = { minimal: null, low: null, medium: null, } as const; const AI_GATEWAY_MODELS_URL = "https://ai-gateway.vercel.sh/v1"; const AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh"; const ZAI_TOOL_STREAM_UNSUPPORTED_MODELS = new Set(["glm-4.5", "glm-4.5-air", "glm-4.5-flash", "glm-4.5v"]); const EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS = new Set([ "github-copilot:claude-haiku-4.5", "github-copilot:claude-sonnet-4", "github-copilot:claude-sonnet-4.5", ]); const DEEPSEEK_V4_THINKING_LEVEL_MAP = { minimal: null, low: null, medium: null, high: "high", xhigh: "max", } as const; const OPENAI_RESPONSES_NONE_REASONING_MODELS = new Set([ "gpt-5.1", "gpt-5.2", "gpt-5.3-codex", "gpt-5.4", "gpt-5.4-mini", "gpt-5.4-nano", "gpt-5.5", ]); function mergeThinkingLevelMap(model: Model, map: NonNullable["thinkingLevelMap"]>): void { model.thinkingLevelMap = { ...model.thinkingLevelMap, ...map }; } function getTogetherCompat(modelId: string, reasoning: boolean): OpenAICompletionsCompat { if (!reasoning) return TOGETHER_BASE_COMPAT; if (TOGETHER_REASONING_EFFORT_MODELS.has(modelId)) return TOGETHER_REASONING_EFFORT_COMPAT; if (TOGETHER_TOGGLE_REASONING_EFFORT_MODELS.has(modelId)) return TOGETHER_TOGGLE_REASONING_EFFORT_COMPAT; if (TOGETHER_REASONING_ONLY_MODELS.has(modelId)) return TOGETHER_BASE_COMPAT; return TOGETHER_TOGGLE_REASONING_COMPAT; } function getTogetherThinkingLevelMap( modelId: string, reasoning: boolean, ): NonNullable["thinkingLevelMap"]> | undefined { if (!reasoning) return undefined; if (TOGETHER_REASONING_EFFORT_MODELS.has(modelId)) return { ...TOGETHER_REASONING_EFFORT_LEVEL_MAP }; if (TOGETHER_TOGGLE_REASONING_EFFORT_MODELS.has(modelId)) return { ...TOGETHER_DEEPSEEK_V4_THINKING_LEVEL_MAP }; if (TOGETHER_REASONING_ONLY_MODELS.has(modelId)) return { ...TOGETHER_FIXED_REASONING_LEVEL_MAP }; return { ...TOGETHER_TOGGLE_REASONING_LEVEL_MAP }; } function supportsOpenAiXhigh(modelId: string): boolean { return ( modelId.includes("gpt-5.2") || modelId.includes("gpt-5.3") || modelId.includes("gpt-5.4") || modelId.includes("gpt-5.5") ); } function isGoogleThinkingApi(model: Model): boolean { return model.api === "google-generative-ai" || model.api === "google-vertex"; } function isGemini3ProModel(modelId: string): boolean { return /gemini-3(?:\.\d+)?-pro/.test(modelId.toLowerCase()); } function isGemini3FlashModel(modelId: string): boolean { return /gemini-3(?:\.\d+)?-flash/.test(modelId.toLowerCase()); } function isGemma4Model(modelId: string): boolean { return /gemma-?4/.test(modelId.toLowerCase()); } function applyThinkingLevelMetadata(model: Model): void { if ( (model.api === "openai-responses" || model.api === "azure-openai-responses") && model.id.startsWith("gpt-5") ) { mergeThinkingLevelMap(model, { off: null }); } if (model.provider === "github-copilot" && model.id.startsWith("gpt-5")) { mergeThinkingLevelMap(model, { minimal: "low" }); } if ( model.api === "openai-responses" && model.provider === "openai" && OPENAI_RESPONSES_NONE_REASONING_MODELS.has(model.id) ) { mergeThinkingLevelMap(model, { off: "none" }); } if (supportsOpenAiXhigh(model.id)) { mergeThinkingLevelMap(model, { xhigh: "xhigh" }); } if (model.id.includes("opus-4-6") || model.id.includes("opus-4.6")) { mergeThinkingLevelMap(model, { xhigh: "max" }); } if (model.id.includes("opus-4-7") || model.id.includes("opus-4.7")) { mergeThinkingLevelMap(model, { xhigh: "xhigh" }); } if (model.api === "openai-completions" && model.id.includes("deepseek-v4")) { mergeThinkingLevelMap(model, DEEPSEEK_V4_THINKING_LEVEL_MAP); } if (isGoogleThinkingApi(model) && isGemini3ProModel(model.id)) { mergeThinkingLevelMap(model, { off: null, minimal: null, low: "LOW", medium: null, high: "HIGH" }); } if (isGoogleThinkingApi(model) && isGemini3FlashModel(model.id)) { mergeThinkingLevelMap(model, { off: null }); } if (isGoogleThinkingApi(model) && isGemma4Model(model.id)) { mergeThinkingLevelMap(model, { off: null, minimal: "MINIMAL", low: null, medium: null, high: "HIGH" }); } if (model.provider === "groq" && model.id === "qwen/qwen3-32b") { mergeThinkingLevelMap(model, { minimal: null, low: null, medium: null, high: "default" }); } if (model.provider === "openai-codex" && supportsOpenAiXhigh(model.id)) { mergeThinkingLevelMap(model, { minimal: "low" }); } if (model.provider === "openrouter" && model.id.startsWith("inception/mercury-2")) { // Mercury 2 in instant mode (reasoning_effort: "none") disables tool calling. // Mark "off" unsupported so the openai-completions provider omits the reasoning param // instead of defaulting to {reasoning:{effort:"none"}} (see openai-completions.ts:575). // Pi's low/medium/high pass through verbatim; OpenRouter normalizes to Mercury's vocabulary. mergeThinkingLevelMap(model, { off: null }); } } function getAnthropicMessagesCompat(provider: string, modelId: string): AnthropicMessagesCompat | undefined { return EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`) ? { supportsEagerToolInputStreaming: false } : undefined; } function getBedrockBaseUrl(modelId: string): string { return modelId.startsWith("eu.") ? "https://bedrock-runtime.eu-central-1.amazonaws.com" : "https://bedrock-runtime.us-east-1.amazonaws.com"; } async function fetchOpenRouterModels(): Promise[]> { try { console.log("Fetching models from OpenRouter API..."); const response = await fetch("https://openrouter.ai/api/v1/models"); const data = await response.json(); const models: Model[] = []; for (const model of data.data) { // Only include models that support tools if (!model.supported_parameters?.includes("tools")) continue; // Parse provider from model ID let provider: KnownProvider = "openrouter"; let modelKey = model.id; modelKey = model.id; // Keep full ID for OpenRouter // Parse input modalities const input: ("text" | "image")[] = ["text"]; if (model.architecture?.modality?.includes("image")) { input.push("image"); } // Convert pricing from $/token to $/million tokens const inputCost = parseFloat(model.pricing?.prompt || "0") * 1_000_000; const outputCost = parseFloat(model.pricing?.completion || "0") * 1_000_000; const cacheReadCost = parseFloat(model.pricing?.input_cache_read || "0") * 1_000_000; const cacheWriteCost = parseFloat(model.pricing?.input_cache_write || "0") * 1_000_000; const normalizedModel: Model = { id: modelKey, name: model.name, api: "openai-completions", baseUrl: "https://openrouter.ai/api/v1", provider, reasoning: model.supported_parameters?.includes("reasoning") || false, input, cost: { input: inputCost, output: outputCost, cacheRead: cacheReadCost, cacheWrite: cacheWriteCost, }, contextWindow: model.context_length || 4096, maxTokens: model.top_provider?.max_completion_tokens || 4096, }; models.push(normalizedModel); } console.log(`Fetched ${models.length} tool-capable models from OpenRouter`); return models; } catch (error) { console.error("Failed to fetch OpenRouter models:", error); return []; } } async function fetchAiGatewayModels(): Promise[]> { try { console.log("Fetching models from Vercel AI Gateway API..."); const response = await fetch(`${AI_GATEWAY_MODELS_URL}/models`); const data = await response.json(); const models: Model[] = []; const toNumber = (value: string | number | undefined): number => { if (typeof value === "number") { return Number.isFinite(value) ? value : 0; } const parsed = parseFloat(value ?? "0"); return Number.isFinite(parsed) ? parsed : 0; }; const items = Array.isArray(data.data) ? (data.data as AiGatewayModel[]) : []; for (const model of items) { const tags = Array.isArray(model.tags) ? model.tags : []; // Only include models that support tools if (!tags.includes("tool-use")) continue; const input: ("text" | "image")[] = ["text"]; if (tags.includes("vision")) { input.push("image"); } const inputCost = toNumber(model.pricing?.input) * 1_000_000; const outputCost = toNumber(model.pricing?.output) * 1_000_000; const cacheReadCost = toNumber(model.pricing?.input_cache_read) * 1_000_000; const cacheWriteCost = toNumber(model.pricing?.input_cache_write) * 1_000_000; models.push({ id: model.id, name: model.name || model.id, api: "anthropic-messages", baseUrl: AI_GATEWAY_BASE_URL, provider: "vercel-ai-gateway", reasoning: tags.includes("reasoning"), input, cost: { input: inputCost, output: outputCost, cacheRead: cacheReadCost, cacheWrite: cacheWriteCost, }, contextWindow: model.context_window || 4096, maxTokens: model.max_tokens || 4096, }); } console.log(`Fetched ${models.length} tool-capable models from Vercel AI Gateway`); return models; } catch (error) { console.error("Failed to fetch Vercel AI Gateway models:", error); return []; } } async function loadModelsDevData(): Promise[]> { try { console.log("Fetching models from models.dev API..."); const response = await fetch("https://models.dev/api.json"); const data = await response.json(); const models: Model[] = []; // Process Amazon Bedrock models if (data["amazon-bedrock"]?.models) { for (const [modelId, model] of Object.entries(data["amazon-bedrock"].models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; let id = modelId; if (id.startsWith("ai21.jamba")) { // These models doesn't support tool use in streaming mode continue; } if (id.startsWith("mistral.mistral-7b-instruct-v0")) { // These models doesn't support system messages continue; } models.push({ id, name: m.name || id, api: "bedrock-converse-stream" as const, provider: "amazon-bedrock" as const, baseUrl: getBedrockBaseUrl(id), reasoning: m.reasoning === true, input: (m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"]) as ("text" | "image")[], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process Anthropic models if (data.anthropic?.models) { for (const [modelId, model] of Object.entries(data.anthropic.models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "anthropic-messages", provider: "anthropic", baseUrl: "https://api.anthropic.com", reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process Google models if (data.google?.models) { for (const [modelId, model] of Object.entries(data.google.models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "google-generative-ai", provider: "google", baseUrl: "https://generativelanguage.googleapis.com/v1beta", reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process OpenAI models if (data.openai?.models) { for (const [modelId, model] of Object.entries(data.openai.models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "openai-responses", provider: "openai", baseUrl: "https://api.openai.com/v1", reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process Groq models if (data.groq?.models) { for (const [modelId, model] of Object.entries(data.groq.models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "openai-completions", provider: "groq", baseUrl: "https://api.groq.com/openai/v1", reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process Cerebras models if (data.cerebras?.models) { for (const [modelId, model] of Object.entries(data.cerebras.models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "openai-completions", provider: "cerebras", baseUrl: "https://api.cerebras.ai/v1", reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process Cloudflare Workers AI models if (data["cloudflare-workers-ai"]?.models) { for (const [modelId, model] of Object.entries(data["cloudflare-workers-ai"].models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "openai-completions", provider: "cloudflare-workers-ai", baseUrl: CLOUDFLARE_WORKERS_AI_BASE_URL, reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, compat: { sendSessionAffinityHeaders: true }, }); } } // Process Cloudflare AI Gateway models if (data["cloudflare-ai-gateway"]?.models) { for (const [prefixedId, model] of Object.entries(data["cloudflare-ai-gateway"].models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; const slashIdx = prefixedId.indexOf("/"); if (slashIdx === -1) continue; const upstream = prefixedId.slice(0, slashIdx); const nativeId = prefixedId.slice(slashIdx + 1); let api: "anthropic-messages" | "openai-completions" | "openai-responses"; let baseUrl: string; let id: string; if (upstream === "openai") { api = "openai-responses"; baseUrl = CLOUDFLARE_AI_GATEWAY_OPENAI_BASE_URL; id = nativeId; } else if (upstream === "anthropic") { api = "anthropic-messages"; baseUrl = CLOUDFLARE_AI_GATEWAY_ANTHROPIC_BASE_URL; id = nativeId; } else if (upstream === "workers-ai") { api = "openai-completions"; baseUrl = CLOUDFLARE_AI_GATEWAY_COMPAT_BASE_URL; id = prefixedId; } else { continue; } // workers-ai/* through the gateway forwards x-session-affinity to // the underlying Workers AI runtime for prefix-cache routing. const compat = upstream === "workers-ai" ? { sendSessionAffinityHeaders: true } : undefined; models.push({ id, name: m.name || id, api, provider: "cloudflare-ai-gateway", baseUrl, reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, ...(compat ? { compat } : {}), }); } } // Process xAi models if (data.xai?.models) { for (const [modelId, model] of Object.entries(data.xai.models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "openai-completions", provider: "xai", baseUrl: "https://api.x.ai/v1", reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process zAi models if (data["zai-coding-plan"]?.models) { for (const [modelId, model] of Object.entries(data["zai-coding-plan"].models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; const supportsImage = m.modalities?.input?.includes("image"); models.push({ id: modelId, name: m.name || modelId, api: "openai-completions", provider: "zai", baseUrl: "https://api.z.ai/api/coding/paas/v4", reasoning: m.reasoning === true, input: supportsImage ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, compat: { supportsDeveloperRole: false, thinkingFormat: "zai", ...(!ZAI_TOOL_STREAM_UNSUPPORTED_MODELS.has(modelId) ? { zaiToolStream: true } : {}), }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process Mistral models if (data.mistral?.models) { for (const [modelId, model] of Object.entries(data.mistral.models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process Hugging Face models if (data.huggingface?.models) { for (const [modelId, model] of Object.entries(data.huggingface.models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "openai-completions", provider: "huggingface", baseUrl: "https://router.huggingface.co/v1", reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, compat: { supportsDeveloperRole: false, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process Fireworks models if (data["fireworks-ai"]?.models) { for (const [modelId, model] of Object.entries(data["fireworks-ai"].models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "anthropic-messages", provider: "fireworks", // Fireworks Anthropic-compatible API - SDK appends /v1/messages baseUrl: "https://api.fireworks.ai/inference", reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, // Fireworks prompt caching uses automatic prefix matching + session affinity. // x-session-affinity routes requests to the same replica for cache hits. // cache_control on tools and eager_input_streaming are not supported. // See: https://docs.fireworks.ai/tools-sdks/anthropic-compatibility compat: { sendSessionAffinityHeaders: true, supportsEagerToolInputStreaming: false, supportsCacheControlOnTools: false, supportsLongCacheRetention: false, }, }); } } // Process Together AI models const togetherProvider = data.together ?? data.togetherai ?? data["together-ai"]; if (togetherProvider?.models) { for (const [modelId, model] of Object.entries(togetherProvider.models)) { const m = model as ModelsDevModel & { status?: string }; if (m.tool_call !== true) continue; if (m.status === "deprecated") continue; const reasoning = m.reasoning === true; const thinkingLevelMap = getTogetherThinkingLevelMap(modelId, reasoning); models.push({ id: modelId, name: m.name || modelId, api: "openai-completions", provider: "together", baseUrl: TOGETHER_BASE_URL, reasoning, ...(thinkingLevelMap ? { thinkingLevelMap } : {}), input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, compat: getTogetherCompat(modelId, reasoning), contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process OpenCode models (Zen and Go) // API mapping based on provider.npm field: // - @ai-sdk/openai → openai-responses // - @ai-sdk/anthropic → anthropic-messages // - @ai-sdk/google → google-generative-ai // - null/undefined/@ai-sdk/openai-compatible → openai-completions const opencodeVariants = [ { key: "opencode", provider: "opencode", basePath: "https://opencode.ai/zen" }, { key: "opencode-go", provider: "opencode-go", basePath: "https://opencode.ai/zen/go" }, ] as const; for (const variant of opencodeVariants) { if (!data[variant.key]?.models) continue; for (const [modelId, model] of Object.entries(data[variant.key].models)) { const m = model as ModelsDevModel & { status?: string }; if (m.tool_call !== true) continue; if (m.status === "deprecated") continue; const npm = m.provider?.npm; let api: Api; let baseUrl: string; let compat: OpenAICompletionsCompat | undefined; if (npm === "@ai-sdk/openai") { api = "openai-responses"; baseUrl = `${variant.basePath}/v1`; } else if (npm === "@ai-sdk/anthropic") { api = "anthropic-messages"; // Anthropic SDK appends /v1/messages to baseURL baseUrl = variant.basePath; } else if (npm === "@ai-sdk/google") { api = "google-generative-ai"; baseUrl = `${variant.basePath}/v1`; } else if (npm === "@ai-sdk/alibaba") { api = "openai-completions"; baseUrl = `${variant.basePath}/v1`; compat = { cacheControlFormat: "anthropic" }; } else { // null, undefined, or @ai-sdk/openai-compatible api = "openai-completions"; baseUrl = `${variant.basePath}/v1`; } // Fix known mismatches between models.dev npm data and actual // OpenCode Go endpoint behaviour. models.dev reports these models // as @ai-sdk/anthropic, but the OpenCode Go endpoints either don't // accept Anthropic SDK auth (MiniMax M2.7) or are served through // the OpenAI-compatible /v1/chat/completions path (Qwen 3.5/3.6). // Switch them to openai-completions so requests use Bearer auth // and the standard /v1/chat/completions endpoint. if (variant.provider === "opencode-go") { if (modelId === "minimax-m2.7") { api = "openai-completions"; baseUrl = `${variant.basePath}/v1`; } if (modelId === "qwen3.5-plus" || modelId === "qwen3.6-plus") { api = "openai-completions"; baseUrl = `${variant.basePath}/v1`; // Qwen/DashScope uses enable_thinking at the top level. compat = { ...(compat ?? {}), thinkingFormat: "qwen" }; } } models.push({ id: modelId, name: m.name || modelId, api, provider: variant.provider, baseUrl, reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, ...(compat ? { compat } : {}), contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process GitHub Copilot models if (data["github-copilot"]?.models) { for (const [modelId, model] of Object.entries(data["github-copilot"].models)) { const m = model as ModelsDevModel & { status?: string }; if (m.tool_call !== true) continue; if (m.status === "deprecated") continue; // Claude 4.x models route to Anthropic Messages API const isCopilotClaude4 = /^claude-(haiku|sonnet|opus)-4([.\-]|$)/.test(modelId); // gpt-5 models require responses API, others use completions const needsResponsesApi = modelId.startsWith("gpt-5") || modelId.startsWith("oswe"); const api: Api = isCopilotClaude4 ? "anthropic-messages" : needsResponsesApi ? "openai-responses" : "openai-completions"; const anthropicCompat = api === "anthropic-messages" ? getAnthropicMessagesCompat("github-copilot", modelId) : undefined; const copilotModel: Model = { id: modelId, name: m.name || modelId, api, provider: "github-copilot", baseUrl: "https://api.individual.githubcopilot.com", reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 128000, maxTokens: m.limit?.output || 8192, headers: { ...COPILOT_STATIC_HEADERS }, ...(anthropicCompat ? { compat: anthropicCompat } : {}), // compat only applies to openai-completions ...(api === "openai-completions" ? { compat: { supportsStore: false, supportsDeveloperRole: false, supportsReasoningEffort: false, }, } : {}), }; models.push(copilotModel); } } // Process MiniMax models const minimaxVariants = [ { key: "minimax", provider: "minimax", baseUrl: "https://api.minimax.io/anthropic" }, { key: "minimax-cn", provider: "minimax-cn", baseUrl: "https://api.minimaxi.com/anthropic" }, ] as const; for (const { key, provider, baseUrl } of minimaxVariants) { if (data[key]?.models) { for (const [modelId, model] of Object.entries(data[key].models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "anthropic-messages", provider, // MiniMax's Anthropic-compatible API - SDK appends /v1/messages baseUrl, reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } } // Process Kimi For Coding models if (data["kimi-for-coding"]?.models) { const kimiModels = data["kimi-for-coding"].models as Record; const hasCanonicalModel = Object.prototype.hasOwnProperty.call(kimiModels, "kimi-for-coding"); const kimiAliases = new Set(["k2p5", "k2p6"]); for (const [modelId, model] of Object.entries(kimiModels)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; // models.dev may expose versioned aliases (e.g. k2p5/k2p6). // Normalize aliases to the canonical model id and drop duplicates when canonical exists. if (kimiAliases.has(modelId) && hasCanonicalModel) continue; const normalizedId = kimiAliases.has(modelId) ? "kimi-for-coding" : modelId; const normalizedName = kimiAliases.has(modelId) ? "Kimi For Coding" : m.name || normalizedId; models.push({ id: normalizedId, name: normalizedName, api: "anthropic-messages", provider: "kimi-coding", // Kimi For Coding's Anthropic-compatible API - SDK appends /v1/messages baseUrl: "https://api.kimi.com/coding", headers: { ...KIMI_STATIC_HEADERS }, reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } // Process Moonshot AI models const moonshotVariants = [ { key: "moonshotai", provider: "moonshotai", baseUrl: "https://api.moonshot.ai/v1" }, { key: "moonshotai-cn", provider: "moonshotai-cn", baseUrl: "https://api.moonshot.cn/v1" }, ] as const; const moonshotCompat: OpenAICompletionsCompat = { supportsStore: false, supportsDeveloperRole: false, supportsReasoningEffort: false, maxTokensField: "max_tokens", supportsStrictMode: false, }; for (const { key, provider, baseUrl } of moonshotVariants) { if (!data[key]?.models) continue; for (const [modelId, model] of Object.entries(data[key].models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "openai-completions", provider, baseUrl, reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, compat: moonshotCompat, }); } } // Process Xiaomi MiMo models // Built-in `xiaomi` targets the API billing endpoint (single stable URL, // keys from platform.xiaomimimo.com). The three `xiaomi-token-plan-*` // providers cover prepaid Token Plan endpoints in cn / ams / sgp. const xiaomiCompat: OpenAICompletionsCompat = { requiresReasoningContentOnAssistantMessages: true, thinkingFormat: "deepseek", }; const xiaomiVariants = [ { provider: "xiaomi", baseUrl: "https://api.xiaomimimo.com/v1" }, { provider: "xiaomi-token-plan-cn", baseUrl: "https://token-plan-cn.xiaomimimo.com/v1" }, { provider: "xiaomi-token-plan-ams", baseUrl: "https://token-plan-ams.xiaomimimo.com/v1" }, { provider: "xiaomi-token-plan-sgp", baseUrl: "https://token-plan-sgp.xiaomimimo.com/v1" }, ] as const; if (data.xiaomi?.models) { for (const { provider, baseUrl } of xiaomiVariants) { for (const [modelId, model] of Object.entries(data.xiaomi.models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; models.push({ id: modelId, name: m.name || modelId, api: "openai-completions", provider, baseUrl, compat: xiaomiCompat, reasoning: m.reasoning === true, input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); } } } console.log(`Loaded ${models.length} tool-capable models from models.dev`); return models; } catch (error) { console.error("Failed to load models.dev data:", error); return []; } } async function generateModels() { // Fetch models from both sources // models.dev: Anthropic, Google, OpenAI, Groq, Cerebras // OpenRouter: xAI and other providers (excluding Anthropic, Google, OpenAI) // AI Gateway: OpenAI-compatible catalog with tool-capable models const modelsDevModels = await loadModelsDevData(); const openRouterModels = await fetchOpenRouterModels(); const aiGatewayModels = await fetchAiGatewayModels(); // Combine models (models.dev has priority) const allModels = [...modelsDevModels, ...openRouterModels, ...aiGatewayModels].filter( (model) => !((model.provider === "opencode" || model.provider === "opencode-go") && model.id === "gpt-5.3-codex-spark"), ); // Fix incorrect cache pricing for Claude Opus 4.5 from models.dev // models.dev has 3x the correct pricing (1.5/18.75 instead of 0.5/6.25) const opus45 = allModels.find(m => m.provider === "anthropic" && m.id === "claude-opus-4-5"); if (opus45) { opus45.cost.cacheRead = 0.5; opus45.cost.cacheWrite = 6.25; } // Temporary overrides until upstream model metadata is corrected. for (const candidate of allModels) { if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-opus-4-6-v1")) { candidate.cost.cacheRead = 0.5; candidate.cost.cacheWrite = 6.25; } if ( (candidate.provider === "anthropic" || candidate.provider === "opencode" || candidate.provider === "opencode-go" || candidate.provider === "github-copilot") && (candidate.id === "claude-opus-4-6" || candidate.id === "claude-sonnet-4-6" || candidate.id === "claude-opus-4.6" || candidate.id === "claude-sonnet-4.6") ) { candidate.contextWindow = 1000000; } // OpenCode variants list Claude Sonnet 4/4.5 with 1M context, actual limit is 200K if ( (candidate.provider === "opencode" || candidate.provider === "opencode-go") && (candidate.id === "claude-sonnet-4-5" || candidate.id === "claude-sonnet-4") ) { candidate.contextWindow = 200000; } if ((candidate.provider === "opencode" || candidate.provider === "opencode-go") && candidate.id === "gpt-5.4") { candidate.contextWindow = 272000; candidate.maxTokens = 128000; } if (candidate.provider === "openai" && (candidate.id === "gpt-5.4" || candidate.id === "gpt-5.5")) { candidate.contextWindow = 272000; candidate.maxTokens = 128000; } // Keep selected OpenRouter model metadata stable until upstream settles. if (candidate.provider === "openrouter" && candidate.id === "moonshotai/kimi-k2.5") { candidate.cost.input = 0.41; candidate.cost.output = 2.06; candidate.cost.cacheRead = 0.07; candidate.maxTokens = 4096; } if (candidate.provider === "openrouter" && candidate.id === "z-ai/glm-5") { candidate.cost.input = 0.6; candidate.cost.output = 1.9; candidate.cost.cacheRead = 0.119; } } // Add missing EU Opus 4.6 profile if (!allModels.some((m) => m.provider === "amazon-bedrock" && m.id === "eu.anthropic.claude-opus-4-6-v1")) { allModels.push({ id: "eu.anthropic.claude-opus-4-6-v1", name: "Claude Opus 4.6 (EU)", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: getBedrockBaseUrl("eu.anthropic.claude-opus-4-6-v1"), reasoning: true, input: ["text", "image"], cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25, }, contextWindow: 200000, maxTokens: 128000, }); } // Add missing Claude Opus 4.6 if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-opus-4-6")) { allModels.push({ id: "claude-opus-4-6", name: "Claude Opus 4.6", api: "anthropic-messages", baseUrl: "https://api.anthropic.com", provider: "anthropic", reasoning: true, input: ["text", "image"], cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25, }, contextWindow: 1000000, maxTokens: 128000, }); } // Add missing Claude Opus 4.7 if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-opus-4-7")) { allModels.push({ id: "claude-opus-4-7", name: "Claude Opus 4.7", api: "anthropic-messages", baseUrl: "https://api.anthropic.com", provider: "anthropic", reasoning: true, input: ["text", "image"], cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25, }, contextWindow: 1000000, maxTokens: 128000, }); } // Add missing Claude Sonnet 4.6 if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-sonnet-4-6")) { allModels.push({ id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", api: "anthropic-messages", baseUrl: "https://api.anthropic.com", provider: "anthropic", reasoning: true, input: ["text", "image"], cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75, }, contextWindow: 1000000, maxTokens: 64000, }); } // Add missing Gemini 3.1 Flash Lite Preview until models.dev includes it. if (!allModels.some((m) => m.provider === "google" && m.id === "gemini-3.1-flash-lite-preview")) { allModels.push({ id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview", api: "google-generative-ai", baseUrl: "https://generativelanguage.googleapis.com/v1beta", provider: "google", reasoning: true, input: ["text", "image"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, }, contextWindow: 1048576, maxTokens: 65536, }); } // Add missing gpt models if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5-chat-latest")) { allModels.push({ id: "gpt-5-chat-latest", name: "GPT-5 Chat Latest", api: "openai-responses", baseUrl: "https://api.openai.com/v1", provider: "openai", reasoning: false, input: ["text", "image"], cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 128000, maxTokens: 16384, }); } if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex")) { allModels.push({ id: "gpt-5.1-codex", name: "GPT-5.1 Codex", api: "openai-responses", baseUrl: "https://api.openai.com/v1", provider: "openai", reasoning: true, input: ["text", "image"], cost: { input: 1.25, output: 5, cacheRead: 0.125, cacheWrite: 1.25, }, contextWindow: 400000, maxTokens: 128000, }); } if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex-max")) { allModels.push({ id: "gpt-5.1-codex-max", name: "GPT-5.1 Codex Max", api: "openai-responses", baseUrl: "https://api.openai.com/v1", provider: "openai", reasoning: true, input: ["text", "image"], cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 400000, maxTokens: 128000, }); } if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.3-codex-spark")) { allModels.push({ id: "gpt-5.3-codex-spark", name: "GPT-5.3 Codex Spark", api: "openai-responses", baseUrl: "https://api.openai.com/v1", provider: "openai", reasoning: true, input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, }, contextWindow: 128000, maxTokens: 16384, }); } // Add missing GitHub Copilot GPT-5.3 models until models.dev includes them. const copilotBaseModel = allModels.find( (m) => m.provider === "github-copilot" && m.id === "gpt-5.2-codex", ); if (copilotBaseModel) { if (!allModels.some((m) => m.provider === "github-copilot" && m.id === "gpt-5.3-codex")) { allModels.push({ ...copilotBaseModel, id: "gpt-5.3-codex", name: "GPT-5.3 Codex", }); } } if (!allModels.some((m) => m.provider === "openai" && m.id === "gpt-5.4")) { allModels.push({ id: "gpt-5.4", name: "GPT-5.4", api: "openai-responses", baseUrl: "https://api.openai.com/v1", provider: "openai", reasoning: true, input: ["text", "image"], cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0, }, contextWindow: 272000, maxTokens: 128000, }); } const deepseekCompat: OpenAICompletionsCompat = { requiresReasoningContentOnAssistantMessages: true, thinkingFormat: "deepseek", }; const deepseekV4Models: Model<"openai-completions">[] = [ { id: "deepseek-v4-flash", name: "DeepSeek V4 Flash", api: "openai-completions", baseUrl: "https://api.deepseek.com", provider: "deepseek", reasoning: true, input: ["text"], cost: { input: 0.14, output: 0.28, cacheRead: 0.0028, cacheWrite: 0, }, contextWindow: 1000000, maxTokens: 384000, compat: deepseekCompat, }, { id: "deepseek-v4-pro", name: "DeepSeek V4 Pro", api: "openai-completions", baseUrl: "https://api.deepseek.com", provider: "deepseek", reasoning: true, input: ["text"], cost: { input: 0.435, output: 0.87, cacheRead: 0.003625, cacheWrite: 0, }, contextWindow: 1000000, maxTokens: 384000, compat: deepseekCompat, }, ]; allModels.push(...deepseekV4Models); for (const candidate of allModels) { if (candidate.api === "openai-completions" && candidate.id.includes("deepseek-v4")) { candidate.compat = { ...candidate.compat, ...(candidate.provider === "openrouter" ? { requiresReasoningContentOnAssistantMessages: deepseekCompat.requiresReasoningContentOnAssistantMessages, thinkingFormat: deepseekCompat.thinkingFormat, } : deepseekCompat), }; mergeThinkingLevelMap(candidate, DEEPSEEK_V4_THINKING_LEVEL_MAP); } } const minimaxDirectSupportedIds = new Set(["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]); for (const candidate of allModels) { if ( (candidate.provider === "minimax" || candidate.provider === "minimax-cn") && minimaxDirectSupportedIds.has(candidate.id) ) { candidate.contextWindow = 204800; candidate.maxTokens = 131072; } } for (let i = allModels.length - 1; i >= 0; i--) { const candidate = allModels[i]; if ( (candidate.provider === "minimax" || candidate.provider === "minimax-cn") && !minimaxDirectSupportedIds.has(candidate.id) ) { allModels.splice(i, 1); } } // OpenAI Codex (ChatGPT OAuth) models // NOTE: These are not fetched from models.dev; we keep a small, explicit list to avoid aliases. // Context window is based on observed server limits (400s above ~272k), not marketing numbers. const CODEX_BASE_URL = "https://chatgpt.com/backend-api"; const CODEX_CONTEXT = 272000; const CODEX_MAX_TOKENS = 128000; const codexModels: Model<"openai-codex-responses">[] = [ { id: "gpt-5.2", name: "GPT-5.2", api: "openai-codex-responses", provider: "openai-codex", baseUrl: CODEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, contextWindow: CODEX_CONTEXT, maxTokens: CODEX_MAX_TOKENS, }, { id: "gpt-5.3-codex", name: "GPT-5.3 Codex", api: "openai-codex-responses", provider: "openai-codex", baseUrl: CODEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, contextWindow: CODEX_CONTEXT, maxTokens: CODEX_MAX_TOKENS, }, { id: "gpt-5.3-codex-spark", name: "GPT-5.3 Codex Spark", api: "openai-codex-responses", provider: "openai-codex", baseUrl: CODEX_BASE_URL, reasoning: true, input: ["text"], cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, contextWindow: CODEX_CONTEXT, maxTokens: CODEX_MAX_TOKENS, }, { id: "gpt-5.4", name: "GPT-5.4", api: "openai-codex-responses", provider: "openai-codex", baseUrl: CODEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 }, contextWindow: CODEX_CONTEXT, maxTokens: CODEX_MAX_TOKENS, }, { id: "gpt-5.4-mini", name: "GPT-5.4 mini", api: "openai-codex-responses", provider: "openai-codex", baseUrl: CODEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 0.75, output: 4.5, cacheRead: 0.075, cacheWrite: 0 }, contextWindow: CODEX_CONTEXT, maxTokens: CODEX_MAX_TOKENS, }, { id: "gpt-5.5", name: "GPT-5.5", api: "openai-codex-responses", provider: "openai-codex", baseUrl: CODEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 5, output: 30, cacheRead: 0.5, cacheWrite: 0 }, contextWindow: CODEX_CONTEXT, maxTokens: CODEX_MAX_TOKENS, }, ]; allModels.push(...codexModels); // Add missing Grok models const missingGrokModels: Model<"openai-completions">[] = [ { id: "grok-3", name: "Grok 3", api: "openai-completions", baseUrl: "https://api.x.ai/v1", provider: "xai", reasoning: false, input: ["text"], cost: { input: 3, output: 15, cacheRead: 0.75, cacheWrite: 0 }, contextWindow: 131072, maxTokens: 8192, }, { id: "grok-3-fast", name: "Grok 3 Fast", api: "openai-completions", baseUrl: "https://api.x.ai/v1", provider: "xai", reasoning: false, input: ["text"], cost: { input: 5, output: 25, cacheRead: 1.25, cacheWrite: 0 }, contextWindow: 131072, maxTokens: 8192, }, { id: "grok-code-fast-1", name: "Grok Code Fast 1", api: "openai-completions", baseUrl: "https://api.x.ai/v1", provider: "xai", reasoning: false, input: ["text"], cost: { input: 0.2, output: 1.5, cacheRead: 0.02, cacheWrite: 0, }, contextWindow: 32768, maxTokens: 8192, }, ]; for (const model of missingGrokModels) { if (!allModels.some(m => m.provider === model.provider && m.id === model.id)) { allModels.push(model); } } // Add missing Mistral Medium 3.5 model until models.dev includes it if (!allModels.some(m => m.provider === "mistral" && m.id === "mistral-medium-3.5")) { allModels.push({ id: "mistral-medium-3.5", name: "Mistral Medium 3.5", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", reasoning: true, input: ["text", "image"], cost: { input: 1.5, output: 7.5, cacheRead: 0, cacheWrite: 0, }, contextWindow: 262144, // 256k tokens maxTokens: 262144, }); } // Add "auto" alias for openrouter/auto if (!allModels.some(m => m.provider === "openrouter" && m.id === "auto")) { allModels.push({ id: "auto", name: "Auto", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text", "image"], cost: { // we dont know about the costs because OpenRouter auto routes to different models // and then charges you for the underlying used model input:0, output:0, cacheRead:0, cacheWrite:0, }, contextWindow: 2000000, maxTokens: 30000, }); } const VERTEX_BASE_URL = "https://{location}-aiplatform.googleapis.com"; const vertexModels: Model<"google-vertex">[] = [ { id: "gemini-3-pro-preview", name: "Gemini 3 Pro Preview (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, contextWindow: 1000000, maxTokens: 64000, }, { id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, contextWindow: 1048576, maxTokens: 65536, }, { id: "gemini-3.1-pro-preview-customtools", name: "Gemini 3.1 Pro Preview Custom Tools (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, contextWindow: 1048576, maxTokens: 65536, }, { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0 }, contextWindow: 1048576, maxTokens: 65536, }, { id: "gemini-2.0-flash", name: "Gemini 2.0 Flash (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: false, input: ["text", "image"], cost: { input: 0.15, output: 0.6, cacheRead: 0.0375, cacheWrite: 0 }, contextWindow: 1048576, maxTokens: 8192, }, { id: "gemini-2.0-flash-lite", name: "Gemini 2.0 Flash Lite (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 }, contextWindow: 1048576, maxTokens: 65536, }, { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, contextWindow: 1048576, maxTokens: 65536, }, { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 0.3, output: 2.5, cacheRead: 0.03, cacheWrite: 0 }, contextWindow: 1048576, maxTokens: 65536, }, { id: "gemini-2.5-flash-lite-preview-09-2025", name: "Gemini 2.5 Flash Lite Preview 09-25 (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 }, contextWindow: 1048576, maxTokens: 65536, }, { id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: true, input: ["text", "image"], cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 }, contextWindow: 1048576, maxTokens: 65536, }, { id: "gemini-1.5-pro", name: "Gemini 1.5 Pro (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: false, input: ["text", "image"], cost: { input: 1.25, output: 5, cacheRead: 0.3125, cacheWrite: 0 }, contextWindow: 1000000, maxTokens: 8192, }, { id: "gemini-1.5-flash", name: "Gemini 1.5 Flash (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: false, input: ["text", "image"], cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 }, contextWindow: 1000000, maxTokens: 8192, }, { id: "gemini-1.5-flash-8b", name: "Gemini 1.5 Flash-8B (Vertex)", api: "google-vertex", provider: "google-vertex", baseUrl: VERTEX_BASE_URL, reasoning: false, input: ["text", "image"], cost: { input: 0.0375, output: 0.15, cacheRead: 0.01, cacheWrite: 0 }, contextWindow: 1000000, maxTokens: 8192, }, ]; allModels.push(...vertexModels); const azureOpenAiModels: Model[] = allModels .filter((model) => model.provider === "openai" && model.api === "openai-responses") .map((model) => ({ ...model, api: "azure-openai-responses", provider: "azure-openai-responses", baseUrl: "", })); allModels.push(...azureOpenAiModels); for (const model of allModels) { applyThinkingLevelMetadata(model); } // Group by provider and deduplicate by model ID const providers: Record>> = {}; for (const model of allModels) { if (!providers[model.provider]) { providers[model.provider] = {}; } // Use model ID as key to automatically deduplicate // Only add if not already present (models.dev takes priority over OpenRouter) if (!providers[model.provider][model.id]) { providers[model.provider][model.id] = model; } } // Generate TypeScript file let output = `// This file is auto-generated by scripts/generate-models.ts // Do not edit manually - run 'npm run generate-models' to update import type { Model } from "./types.ts"; export const MODELS = { `; // Generate provider sections (sorted for deterministic output) const sortedProviderIds = Object.keys(providers).sort(); for (const providerId of sortedProviderIds) { const models = providers[providerId]; output += `\t${JSON.stringify(providerId)}: {\n`; const sortedModelIds = Object.keys(models).sort(); for (const modelId of sortedModelIds) { const model = models[modelId]; output += `\t\t"${model.id}": {\n`; output += `\t\t\tid: "${model.id}",\n`; output += `\t\t\tname: "${model.name}",\n`; output += `\t\t\tapi: "${model.api}",\n`; output += `\t\t\tprovider: "${model.provider}",\n`; if (model.baseUrl !== undefined) { output += `\t\t\tbaseUrl: "${model.baseUrl}",\n`; } if (model.headers) { output += `\t\t\theaders: ${JSON.stringify(model.headers)},\n`; } if (model.compat) { output += ` compat: ${JSON.stringify(model.compat)}, `; } output += `\t\t\treasoning: ${model.reasoning},\n`; if (model.thinkingLevelMap) { output += `\t\t\tthinkingLevelMap: ${JSON.stringify(model.thinkingLevelMap)},\n`; } output += `\t\t\tinput: [${model.input.map(i => `"${i}"`).join(", ")}],\n`; output += `\t\t\tcost: {\n`; output += `\t\t\t\tinput: ${model.cost.input},\n`; output += `\t\t\t\toutput: ${model.cost.output},\n`; output += `\t\t\t\tcacheRead: ${model.cost.cacheRead},\n`; output += `\t\t\t\tcacheWrite: ${model.cost.cacheWrite},\n`; output += `\t\t\t},\n`; output += `\t\t\tcontextWindow: ${model.contextWindow},\n`; output += `\t\t\tmaxTokens: ${model.maxTokens},\n`; output += `\t\t} satisfies Model<"${model.api}">,\n`; } output += `\t},\n`; } output += `} as const; `; // Write file writeFileSync(join(packageRoot, "src/models.generated.ts"), output); console.log("Generated src/models.generated.ts"); // Print statistics const totalModels = allModels.length; const reasoningModels = allModels.filter(m => m.reasoning).length; console.log(`\nModel Statistics:`); console.log(` Total tool-capable models: ${totalModels}`); console.log(` Reasoning-capable models: ${reasoningModels}`); for (const [provider, models] of Object.entries(providers)) { console.log(` ${provider}: ${Object.keys(models).length} models`); } } // Run the generator generateModels().catch(console.error);