mirror of
https://github.com/badlogic/pi-mono.git
synced 2026-05-24 22:15:33 +00:00
1935 lines
58 KiB
TypeScript
1935 lines
58 KiB
TypeScript
#!/usr/bin/env tsx
|
|
|
|
import { writeFileSync } from "fs";
|
|
import { join, dirname } from "path";
|
|
import { fileURLToPath } from "url";
|
|
import {
|
|
CLOUDFLARE_AI_GATEWAY_ANTHROPIC_BASE_URL,
|
|
CLOUDFLARE_AI_GATEWAY_COMPAT_BASE_URL,
|
|
CLOUDFLARE_AI_GATEWAY_OPENAI_BASE_URL,
|
|
CLOUDFLARE_WORKERS_AI_BASE_URL,
|
|
} from "../src/providers/cloudflare.js";
|
|
import {
|
|
Api,
|
|
type AnthropicMessagesCompat,
|
|
KnownProvider,
|
|
Model,
|
|
type OpenAICompletionsCompat,
|
|
} from "../src/types.js";
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = dirname(__filename);
|
|
const packageRoot = join(__dirname, "..");
|
|
|
|
interface ModelsDevModel {
|
|
id: string;
|
|
name: string;
|
|
tool_call?: boolean;
|
|
reasoning?: boolean;
|
|
limit?: {
|
|
context?: number;
|
|
output?: number;
|
|
};
|
|
cost?: {
|
|
input?: number;
|
|
output?: number;
|
|
cache_read?: number;
|
|
cache_write?: number;
|
|
};
|
|
modalities?: {
|
|
input?: string[];
|
|
};
|
|
provider?: {
|
|
npm?: string;
|
|
};
|
|
}
|
|
|
|
interface AiGatewayModel {
|
|
id: string;
|
|
name?: string;
|
|
context_window?: number;
|
|
max_tokens?: number;
|
|
tags?: string[];
|
|
pricing?: {
|
|
input?: string | number;
|
|
output?: string | number;
|
|
input_cache_read?: string | number;
|
|
input_cache_write?: string | number;
|
|
};
|
|
}
|
|
|
|
const COPILOT_STATIC_HEADERS = {
|
|
"User-Agent": "GitHubCopilotChat/0.35.0",
|
|
"Editor-Version": "vscode/1.107.0",
|
|
"Editor-Plugin-Version": "copilot-chat/0.35.0",
|
|
"Copilot-Integration-Id": "vscode-chat",
|
|
} as const;
|
|
|
|
const KIMI_STATIC_HEADERS = {
|
|
"User-Agent": "KimiCLI/1.5",
|
|
} as const;
|
|
|
|
const TOGETHER_BASE_URL = "https://api.together.ai/v1";
|
|
const TOGETHER_BASE_COMPAT: OpenAICompletionsCompat = {
|
|
supportsStore: false,
|
|
supportsDeveloperRole: false,
|
|
supportsReasoningEffort: false,
|
|
maxTokensField: "max_tokens",
|
|
supportsStrictMode: false,
|
|
supportsLongCacheRetention: false,
|
|
};
|
|
const TOGETHER_TOGGLE_REASONING_COMPAT: OpenAICompletionsCompat = {
|
|
...TOGETHER_BASE_COMPAT,
|
|
thinkingFormat: "together",
|
|
};
|
|
const TOGETHER_REASONING_EFFORT_COMPAT: OpenAICompletionsCompat = {
|
|
...TOGETHER_BASE_COMPAT,
|
|
supportsReasoningEffort: true,
|
|
thinkingFormat: "openai",
|
|
};
|
|
const TOGETHER_TOGGLE_REASONING_EFFORT_COMPAT: OpenAICompletionsCompat = {
|
|
...TOGETHER_TOGGLE_REASONING_COMPAT,
|
|
supportsReasoningEffort: true,
|
|
};
|
|
const TOGETHER_REASONING_ONLY_MODELS = new Set([
|
|
"deepseek-ai/DeepSeek-R1",
|
|
"MiniMaxAI/MiniMax-M2.5",
|
|
"MiniMaxAI/MiniMax-M2.7",
|
|
]);
|
|
const TOGETHER_REASONING_EFFORT_MODELS = new Set(["openai/gpt-oss-20b", "openai/gpt-oss-120b"]);
|
|
const TOGETHER_TOGGLE_REASONING_EFFORT_MODELS = new Set(["deepseek-ai/DeepSeek-V4-Pro"]);
|
|
const TOGETHER_FIXED_REASONING_LEVEL_MAP = {
|
|
off: null,
|
|
minimal: null,
|
|
low: null,
|
|
medium: null,
|
|
} as const;
|
|
const TOGETHER_REASONING_EFFORT_LEVEL_MAP = {
|
|
off: null,
|
|
minimal: null,
|
|
} as const;
|
|
const TOGETHER_DEEPSEEK_V4_THINKING_LEVEL_MAP = {
|
|
minimal: null,
|
|
low: null,
|
|
medium: null,
|
|
high: "high",
|
|
xhigh: null,
|
|
} as const;
|
|
const TOGETHER_TOGGLE_REASONING_LEVEL_MAP = {
|
|
minimal: null,
|
|
low: null,
|
|
medium: null,
|
|
} as const;
|
|
|
|
const AI_GATEWAY_MODELS_URL = "https://ai-gateway.vercel.sh/v1";
|
|
const AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh";
|
|
const ZAI_TOOL_STREAM_UNSUPPORTED_MODELS = new Set(["glm-4.5", "glm-4.5-air", "glm-4.5-flash", "glm-4.5v"]);
|
|
const EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS = new Set([
|
|
"github-copilot:claude-haiku-4.5",
|
|
"github-copilot:claude-sonnet-4",
|
|
"github-copilot:claude-sonnet-4.5",
|
|
]);
|
|
|
|
const DEEPSEEK_V4_THINKING_LEVEL_MAP = {
|
|
minimal: null,
|
|
low: null,
|
|
medium: null,
|
|
high: "high",
|
|
xhigh: "max",
|
|
} as const;
|
|
|
|
const OPENAI_RESPONSES_NONE_REASONING_MODELS = new Set([
|
|
"gpt-5.1",
|
|
"gpt-5.2",
|
|
"gpt-5.3-codex",
|
|
"gpt-5.4",
|
|
"gpt-5.4-mini",
|
|
"gpt-5.4-nano",
|
|
"gpt-5.5",
|
|
]);
|
|
|
|
function mergeThinkingLevelMap(model: Model<any>, map: NonNullable<Model<any>["thinkingLevelMap"]>): void {
|
|
model.thinkingLevelMap = { ...model.thinkingLevelMap, ...map };
|
|
}
|
|
|
|
function getTogetherCompat(modelId: string, reasoning: boolean): OpenAICompletionsCompat {
|
|
if (!reasoning) return TOGETHER_BASE_COMPAT;
|
|
if (TOGETHER_REASONING_EFFORT_MODELS.has(modelId)) return TOGETHER_REASONING_EFFORT_COMPAT;
|
|
if (TOGETHER_TOGGLE_REASONING_EFFORT_MODELS.has(modelId)) return TOGETHER_TOGGLE_REASONING_EFFORT_COMPAT;
|
|
if (TOGETHER_REASONING_ONLY_MODELS.has(modelId)) return TOGETHER_BASE_COMPAT;
|
|
return TOGETHER_TOGGLE_REASONING_COMPAT;
|
|
}
|
|
|
|
function getTogetherThinkingLevelMap(
|
|
modelId: string,
|
|
reasoning: boolean,
|
|
): NonNullable<Model<any>["thinkingLevelMap"]> | undefined {
|
|
if (!reasoning) return undefined;
|
|
if (TOGETHER_REASONING_EFFORT_MODELS.has(modelId)) return { ...TOGETHER_REASONING_EFFORT_LEVEL_MAP };
|
|
if (TOGETHER_TOGGLE_REASONING_EFFORT_MODELS.has(modelId)) return { ...TOGETHER_DEEPSEEK_V4_THINKING_LEVEL_MAP };
|
|
if (TOGETHER_REASONING_ONLY_MODELS.has(modelId)) return { ...TOGETHER_FIXED_REASONING_LEVEL_MAP };
|
|
return { ...TOGETHER_TOGGLE_REASONING_LEVEL_MAP };
|
|
}
|
|
|
|
function supportsOpenAiXhigh(modelId: string): boolean {
|
|
return (
|
|
modelId.includes("gpt-5.2") ||
|
|
modelId.includes("gpt-5.3") ||
|
|
modelId.includes("gpt-5.4") ||
|
|
modelId.includes("gpt-5.5")
|
|
);
|
|
}
|
|
|
|
function isGoogleThinkingApi(model: Model<any>): boolean {
|
|
return model.api === "google-generative-ai" || model.api === "google-vertex";
|
|
}
|
|
|
|
function isGemini3ProModel(modelId: string): boolean {
|
|
return /gemini-3(?:\.\d+)?-pro/.test(modelId.toLowerCase());
|
|
}
|
|
|
|
function isGemini3FlashModel(modelId: string): boolean {
|
|
return /gemini-3(?:\.\d+)?-flash/.test(modelId.toLowerCase());
|
|
}
|
|
|
|
function isGemma4Model(modelId: string): boolean {
|
|
return /gemma-?4/.test(modelId.toLowerCase());
|
|
}
|
|
|
|
function applyThinkingLevelMetadata(model: Model<any>): void {
|
|
if (
|
|
(model.api === "openai-responses" || model.api === "azure-openai-responses") &&
|
|
model.id.startsWith("gpt-5")
|
|
) {
|
|
mergeThinkingLevelMap(model, { off: null });
|
|
}
|
|
if (model.provider === "github-copilot" && model.id.startsWith("gpt-5")) {
|
|
mergeThinkingLevelMap(model, { minimal: "low" });
|
|
}
|
|
if (
|
|
model.api === "openai-responses" &&
|
|
model.provider === "openai" &&
|
|
OPENAI_RESPONSES_NONE_REASONING_MODELS.has(model.id)
|
|
) {
|
|
mergeThinkingLevelMap(model, { off: "none" });
|
|
}
|
|
if (supportsOpenAiXhigh(model.id)) {
|
|
mergeThinkingLevelMap(model, { xhigh: "xhigh" });
|
|
}
|
|
if (model.id.includes("opus-4-6") || model.id.includes("opus-4.6")) {
|
|
mergeThinkingLevelMap(model, { xhigh: "max" });
|
|
}
|
|
if (model.id.includes("opus-4-7") || model.id.includes("opus-4.7")) {
|
|
mergeThinkingLevelMap(model, { xhigh: "xhigh" });
|
|
}
|
|
if (model.api === "openai-completions" && model.id.includes("deepseek-v4")) {
|
|
mergeThinkingLevelMap(model, DEEPSEEK_V4_THINKING_LEVEL_MAP);
|
|
}
|
|
if (isGoogleThinkingApi(model) && isGemini3ProModel(model.id)) {
|
|
mergeThinkingLevelMap(model, { off: null, minimal: null, low: "LOW", medium: null, high: "HIGH" });
|
|
}
|
|
if (isGoogleThinkingApi(model) && isGemini3FlashModel(model.id)) {
|
|
mergeThinkingLevelMap(model, { off: null });
|
|
}
|
|
if (isGoogleThinkingApi(model) && isGemma4Model(model.id)) {
|
|
mergeThinkingLevelMap(model, { off: null, minimal: "MINIMAL", low: null, medium: null, high: "HIGH" });
|
|
}
|
|
if (model.provider === "groq" && model.id === "qwen/qwen3-32b") {
|
|
mergeThinkingLevelMap(model, { minimal: null, low: null, medium: null, high: "default" });
|
|
}
|
|
if (model.provider === "openai-codex" && supportsOpenAiXhigh(model.id)) {
|
|
mergeThinkingLevelMap(model, { minimal: "low" });
|
|
}
|
|
if (model.provider === "openrouter" && model.id.startsWith("inception/mercury-2")) {
|
|
// Mercury 2 in instant mode (reasoning_effort: "none") disables tool calling.
|
|
// Mark "off" unsupported so the openai-completions provider omits the reasoning param
|
|
// instead of defaulting to {reasoning:{effort:"none"}} (see openai-completions.ts:575).
|
|
// Pi's low/medium/high pass through verbatim; OpenRouter normalizes to Mercury's vocabulary.
|
|
mergeThinkingLevelMap(model, { off: null });
|
|
}
|
|
}
|
|
|
|
function getAnthropicMessagesCompat(provider: string, modelId: string): AnthropicMessagesCompat | undefined {
|
|
return EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`)
|
|
? { supportsEagerToolInputStreaming: false }
|
|
: undefined;
|
|
}
|
|
|
|
function getBedrockBaseUrl(modelId: string): string {
|
|
return modelId.startsWith("eu.")
|
|
? "https://bedrock-runtime.eu-central-1.amazonaws.com"
|
|
: "https://bedrock-runtime.us-east-1.amazonaws.com";
|
|
}
|
|
|
|
async function fetchOpenRouterModels(): Promise<Model<any>[]> {
|
|
try {
|
|
console.log("Fetching models from OpenRouter API...");
|
|
const response = await fetch("https://openrouter.ai/api/v1/models");
|
|
const data = await response.json();
|
|
|
|
const models: Model<any>[] = [];
|
|
|
|
for (const model of data.data) {
|
|
// Only include models that support tools
|
|
if (!model.supported_parameters?.includes("tools")) continue;
|
|
|
|
// Parse provider from model ID
|
|
let provider: KnownProvider = "openrouter";
|
|
let modelKey = model.id;
|
|
|
|
modelKey = model.id; // Keep full ID for OpenRouter
|
|
|
|
// Parse input modalities
|
|
const input: ("text" | "image")[] = ["text"];
|
|
if (model.architecture?.modality?.includes("image")) {
|
|
input.push("image");
|
|
}
|
|
|
|
// Convert pricing from $/token to $/million tokens
|
|
const inputCost = parseFloat(model.pricing?.prompt || "0") * 1_000_000;
|
|
const outputCost = parseFloat(model.pricing?.completion || "0") * 1_000_000;
|
|
const cacheReadCost = parseFloat(model.pricing?.input_cache_read || "0") * 1_000_000;
|
|
const cacheWriteCost = parseFloat(model.pricing?.input_cache_write || "0") * 1_000_000;
|
|
|
|
const normalizedModel: Model<any> = {
|
|
id: modelKey,
|
|
name: model.name,
|
|
api: "openai-completions",
|
|
baseUrl: "https://openrouter.ai/api/v1",
|
|
provider,
|
|
reasoning: model.supported_parameters?.includes("reasoning") || false,
|
|
input,
|
|
cost: {
|
|
input: inputCost,
|
|
output: outputCost,
|
|
cacheRead: cacheReadCost,
|
|
cacheWrite: cacheWriteCost,
|
|
},
|
|
contextWindow: model.context_length || 4096,
|
|
maxTokens: model.top_provider?.max_completion_tokens || 4096,
|
|
};
|
|
models.push(normalizedModel);
|
|
}
|
|
|
|
console.log(`Fetched ${models.length} tool-capable models from OpenRouter`);
|
|
return models;
|
|
} catch (error) {
|
|
console.error("Failed to fetch OpenRouter models:", error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
async function fetchAiGatewayModels(): Promise<Model<any>[]> {
|
|
try {
|
|
console.log("Fetching models from Vercel AI Gateway API...");
|
|
const response = await fetch(`${AI_GATEWAY_MODELS_URL}/models`);
|
|
const data = await response.json();
|
|
const models: Model<any>[] = [];
|
|
|
|
const toNumber = (value: string | number | undefined): number => {
|
|
if (typeof value === "number") {
|
|
return Number.isFinite(value) ? value : 0;
|
|
}
|
|
const parsed = parseFloat(value ?? "0");
|
|
return Number.isFinite(parsed) ? parsed : 0;
|
|
};
|
|
|
|
const items = Array.isArray(data.data) ? (data.data as AiGatewayModel[]) : [];
|
|
for (const model of items) {
|
|
const tags = Array.isArray(model.tags) ? model.tags : [];
|
|
// Only include models that support tools
|
|
if (!tags.includes("tool-use")) continue;
|
|
|
|
const input: ("text" | "image")[] = ["text"];
|
|
if (tags.includes("vision")) {
|
|
input.push("image");
|
|
}
|
|
|
|
const inputCost = toNumber(model.pricing?.input) * 1_000_000;
|
|
const outputCost = toNumber(model.pricing?.output) * 1_000_000;
|
|
const cacheReadCost = toNumber(model.pricing?.input_cache_read) * 1_000_000;
|
|
const cacheWriteCost = toNumber(model.pricing?.input_cache_write) * 1_000_000;
|
|
|
|
models.push({
|
|
id: model.id,
|
|
name: model.name || model.id,
|
|
api: "anthropic-messages",
|
|
baseUrl: AI_GATEWAY_BASE_URL,
|
|
provider: "vercel-ai-gateway",
|
|
reasoning: tags.includes("reasoning"),
|
|
input,
|
|
cost: {
|
|
input: inputCost,
|
|
output: outputCost,
|
|
cacheRead: cacheReadCost,
|
|
cacheWrite: cacheWriteCost,
|
|
},
|
|
contextWindow: model.context_window || 4096,
|
|
maxTokens: model.max_tokens || 4096,
|
|
});
|
|
}
|
|
|
|
console.log(`Fetched ${models.length} tool-capable models from Vercel AI Gateway`);
|
|
return models;
|
|
} catch (error) {
|
|
console.error("Failed to fetch Vercel AI Gateway models:", error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
async function loadModelsDevData(): Promise<Model<any>[]> {
|
|
try {
|
|
console.log("Fetching models from models.dev API...");
|
|
const response = await fetch("https://models.dev/api.json");
|
|
const data = await response.json();
|
|
|
|
const models: Model<any>[] = [];
|
|
|
|
// Process Amazon Bedrock models
|
|
if (data["amazon-bedrock"]?.models) {
|
|
for (const [modelId, model] of Object.entries(data["amazon-bedrock"].models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
let id = modelId;
|
|
|
|
if (id.startsWith("ai21.jamba")) {
|
|
// These models doesn't support tool use in streaming mode
|
|
continue;
|
|
}
|
|
|
|
if (id.startsWith("mistral.mistral-7b-instruct-v0")) {
|
|
// These models doesn't support system messages
|
|
continue;
|
|
}
|
|
|
|
models.push({
|
|
id,
|
|
name: m.name || id,
|
|
api: "bedrock-converse-stream" as const,
|
|
provider: "amazon-bedrock" as const,
|
|
baseUrl: getBedrockBaseUrl(id),
|
|
reasoning: m.reasoning === true,
|
|
input: (m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"]) as ("text" | "image")[],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Anthropic models
|
|
if (data.anthropic?.models) {
|
|
for (const [modelId, model] of Object.entries(data.anthropic.models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "anthropic-messages",
|
|
provider: "anthropic",
|
|
baseUrl: "https://api.anthropic.com",
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Google models
|
|
if (data.google?.models) {
|
|
for (const [modelId, model] of Object.entries(data.google.models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "google-generative-ai",
|
|
provider: "google",
|
|
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process OpenAI models
|
|
if (data.openai?.models) {
|
|
for (const [modelId, model] of Object.entries(data.openai.models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "openai-responses",
|
|
provider: "openai",
|
|
baseUrl: "https://api.openai.com/v1",
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Groq models
|
|
if (data.groq?.models) {
|
|
for (const [modelId, model] of Object.entries(data.groq.models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "openai-completions",
|
|
provider: "groq",
|
|
baseUrl: "https://api.groq.com/openai/v1",
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Cerebras models
|
|
if (data.cerebras?.models) {
|
|
for (const [modelId, model] of Object.entries(data.cerebras.models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "openai-completions",
|
|
provider: "cerebras",
|
|
baseUrl: "https://api.cerebras.ai/v1",
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Cloudflare Workers AI models
|
|
if (data["cloudflare-workers-ai"]?.models) {
|
|
for (const [modelId, model] of Object.entries(data["cloudflare-workers-ai"].models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "openai-completions",
|
|
provider: "cloudflare-workers-ai",
|
|
baseUrl: CLOUDFLARE_WORKERS_AI_BASE_URL,
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
compat: { sendSessionAffinityHeaders: true },
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Cloudflare AI Gateway models
|
|
if (data["cloudflare-ai-gateway"]?.models) {
|
|
for (const [prefixedId, model] of Object.entries(data["cloudflare-ai-gateway"].models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
const slashIdx = prefixedId.indexOf("/");
|
|
if (slashIdx === -1) continue;
|
|
const upstream = prefixedId.slice(0, slashIdx);
|
|
const nativeId = prefixedId.slice(slashIdx + 1);
|
|
|
|
let api: "anthropic-messages" | "openai-completions" | "openai-responses";
|
|
let baseUrl: string;
|
|
let id: string;
|
|
if (upstream === "openai") {
|
|
api = "openai-responses";
|
|
baseUrl = CLOUDFLARE_AI_GATEWAY_OPENAI_BASE_URL;
|
|
id = nativeId;
|
|
} else if (upstream === "anthropic") {
|
|
api = "anthropic-messages";
|
|
baseUrl = CLOUDFLARE_AI_GATEWAY_ANTHROPIC_BASE_URL;
|
|
id = nativeId;
|
|
} else if (upstream === "workers-ai") {
|
|
api = "openai-completions";
|
|
baseUrl = CLOUDFLARE_AI_GATEWAY_COMPAT_BASE_URL;
|
|
id = prefixedId;
|
|
} else {
|
|
continue;
|
|
}
|
|
|
|
// workers-ai/* through the gateway forwards x-session-affinity to
|
|
// the underlying Workers AI runtime for prefix-cache routing.
|
|
const compat = upstream === "workers-ai" ? { sendSessionAffinityHeaders: true } : undefined;
|
|
|
|
models.push({
|
|
id,
|
|
name: m.name || id,
|
|
api,
|
|
provider: "cloudflare-ai-gateway",
|
|
baseUrl,
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
...(compat ? { compat } : {}),
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process xAi models
|
|
if (data.xai?.models) {
|
|
for (const [modelId, model] of Object.entries(data.xai.models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "openai-completions",
|
|
provider: "xai",
|
|
baseUrl: "https://api.x.ai/v1",
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process zAi models
|
|
if (data["zai-coding-plan"]?.models) {
|
|
for (const [modelId, model] of Object.entries(data["zai-coding-plan"].models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
const supportsImage = m.modalities?.input?.includes("image");
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "openai-completions",
|
|
provider: "zai",
|
|
baseUrl: "https://api.z.ai/api/coding/paas/v4",
|
|
reasoning: m.reasoning === true,
|
|
input: supportsImage ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
compat: {
|
|
supportsDeveloperRole: false,
|
|
thinkingFormat: "zai",
|
|
...(!ZAI_TOOL_STREAM_UNSUPPORTED_MODELS.has(modelId) ? { zaiToolStream: true } : {}),
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Mistral models
|
|
if (data.mistral?.models) {
|
|
for (const [modelId, model] of Object.entries(data.mistral.models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "mistral-conversations",
|
|
provider: "mistral",
|
|
baseUrl: "https://api.mistral.ai",
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Hugging Face models
|
|
if (data.huggingface?.models) {
|
|
for (const [modelId, model] of Object.entries(data.huggingface.models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "openai-completions",
|
|
provider: "huggingface",
|
|
baseUrl: "https://router.huggingface.co/v1",
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
compat: {
|
|
supportsDeveloperRole: false,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Fireworks models
|
|
if (data["fireworks-ai"]?.models) {
|
|
for (const [modelId, model] of Object.entries(data["fireworks-ai"].models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "anthropic-messages",
|
|
provider: "fireworks",
|
|
// Fireworks Anthropic-compatible API - SDK appends /v1/messages
|
|
baseUrl: "https://api.fireworks.ai/inference",
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
// Fireworks prompt caching uses automatic prefix matching + session affinity.
|
|
// x-session-affinity routes requests to the same replica for cache hits.
|
|
// cache_control on tools and eager_input_streaming are not supported.
|
|
// See: https://docs.fireworks.ai/tools-sdks/anthropic-compatibility
|
|
compat: {
|
|
sendSessionAffinityHeaders: true,
|
|
supportsEagerToolInputStreaming: false,
|
|
supportsCacheControlOnTools: false,
|
|
supportsLongCacheRetention: false,
|
|
},
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Together AI models
|
|
const togetherProvider = data.together ?? data.togetherai ?? data["together-ai"];
|
|
if (togetherProvider?.models) {
|
|
for (const [modelId, model] of Object.entries(togetherProvider.models)) {
|
|
const m = model as ModelsDevModel & { status?: string };
|
|
if (m.tool_call !== true) continue;
|
|
if (m.status === "deprecated") continue;
|
|
|
|
const reasoning = m.reasoning === true;
|
|
const thinkingLevelMap = getTogetherThinkingLevelMap(modelId, reasoning);
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "openai-completions",
|
|
provider: "together",
|
|
baseUrl: TOGETHER_BASE_URL,
|
|
reasoning,
|
|
...(thinkingLevelMap ? { thinkingLevelMap } : {}),
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
compat: getTogetherCompat(modelId, reasoning),
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process OpenCode models (Zen and Go)
|
|
// API mapping based on provider.npm field:
|
|
// - @ai-sdk/openai → openai-responses
|
|
// - @ai-sdk/anthropic → anthropic-messages
|
|
// - @ai-sdk/google → google-generative-ai
|
|
// - null/undefined/@ai-sdk/openai-compatible → openai-completions
|
|
const opencodeVariants = [
|
|
{ key: "opencode", provider: "opencode", basePath: "https://opencode.ai/zen" },
|
|
{ key: "opencode-go", provider: "opencode-go", basePath: "https://opencode.ai/zen/go" },
|
|
] as const;
|
|
|
|
for (const variant of opencodeVariants) {
|
|
if (!data[variant.key]?.models) continue;
|
|
|
|
for (const [modelId, model] of Object.entries(data[variant.key].models)) {
|
|
const m = model as ModelsDevModel & { status?: string };
|
|
if (m.tool_call !== true) continue;
|
|
if (m.status === "deprecated") continue;
|
|
|
|
const npm = m.provider?.npm;
|
|
let api: Api;
|
|
let baseUrl: string;
|
|
let compat: OpenAICompletionsCompat | undefined;
|
|
|
|
if (npm === "@ai-sdk/openai") {
|
|
api = "openai-responses";
|
|
baseUrl = `${variant.basePath}/v1`;
|
|
} else if (npm === "@ai-sdk/anthropic") {
|
|
api = "anthropic-messages";
|
|
// Anthropic SDK appends /v1/messages to baseURL
|
|
baseUrl = variant.basePath;
|
|
} else if (npm === "@ai-sdk/google") {
|
|
api = "google-generative-ai";
|
|
baseUrl = `${variant.basePath}/v1`;
|
|
} else if (npm === "@ai-sdk/alibaba") {
|
|
api = "openai-completions";
|
|
baseUrl = `${variant.basePath}/v1`;
|
|
compat = { cacheControlFormat: "anthropic" };
|
|
} else {
|
|
// null, undefined, or @ai-sdk/openai-compatible
|
|
api = "openai-completions";
|
|
baseUrl = `${variant.basePath}/v1`;
|
|
}
|
|
|
|
// Fix known mismatches between models.dev npm data and actual
|
|
// OpenCode Go endpoint behaviour. models.dev reports these models
|
|
// as @ai-sdk/anthropic, but the OpenCode Go endpoints either don't
|
|
// accept Anthropic SDK auth (MiniMax M2.7) or are served through
|
|
// the OpenAI-compatible /v1/chat/completions path (Qwen 3.5/3.6).
|
|
// Switch them to openai-completions so requests use Bearer auth
|
|
// and the standard /v1/chat/completions endpoint.
|
|
if (variant.provider === "opencode-go") {
|
|
if (modelId === "minimax-m2.7") {
|
|
api = "openai-completions";
|
|
baseUrl = `${variant.basePath}/v1`;
|
|
}
|
|
if (modelId === "qwen3.5-plus" || modelId === "qwen3.6-plus") {
|
|
api = "openai-completions";
|
|
baseUrl = `${variant.basePath}/v1`;
|
|
// Qwen/DashScope uses enable_thinking at the top level.
|
|
compat = { ...(compat ?? {}), thinkingFormat: "qwen" };
|
|
}
|
|
}
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api,
|
|
provider: variant.provider,
|
|
baseUrl,
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
...(compat ? { compat } : {}),
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process GitHub Copilot models
|
|
if (data["github-copilot"]?.models) {
|
|
for (const [modelId, model] of Object.entries(data["github-copilot"].models)) {
|
|
const m = model as ModelsDevModel & { status?: string };
|
|
if (m.tool_call !== true) continue;
|
|
if (m.status === "deprecated") continue;
|
|
|
|
// Claude 4.x models route to Anthropic Messages API
|
|
const isCopilotClaude4 = /^claude-(haiku|sonnet|opus)-4([.\-]|$)/.test(modelId);
|
|
// gpt-5 models require responses API, others use completions
|
|
const needsResponsesApi = modelId.startsWith("gpt-5") || modelId.startsWith("oswe");
|
|
|
|
const api: Api = isCopilotClaude4
|
|
? "anthropic-messages"
|
|
: needsResponsesApi
|
|
? "openai-responses"
|
|
: "openai-completions";
|
|
|
|
const anthropicCompat =
|
|
api === "anthropic-messages" ? getAnthropicMessagesCompat("github-copilot", modelId) : undefined;
|
|
|
|
const copilotModel: Model<any> = {
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api,
|
|
provider: "github-copilot",
|
|
baseUrl: "https://api.individual.githubcopilot.com",
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 128000,
|
|
maxTokens: m.limit?.output || 8192,
|
|
headers: { ...COPILOT_STATIC_HEADERS },
|
|
...(anthropicCompat ? { compat: anthropicCompat } : {}),
|
|
// compat only applies to openai-completions
|
|
...(api === "openai-completions" ? {
|
|
compat: {
|
|
supportsStore: false,
|
|
supportsDeveloperRole: false,
|
|
supportsReasoningEffort: false,
|
|
},
|
|
} : {}),
|
|
};
|
|
|
|
models.push(copilotModel);
|
|
}
|
|
}
|
|
|
|
// Process MiniMax models
|
|
const minimaxVariants = [
|
|
{ key: "minimax", provider: "minimax", baseUrl: "https://api.minimax.io/anthropic" },
|
|
{ key: "minimax-cn", provider: "minimax-cn", baseUrl: "https://api.minimaxi.com/anthropic" },
|
|
] as const;
|
|
|
|
for (const { key, provider, baseUrl } of minimaxVariants) {
|
|
if (data[key]?.models) {
|
|
for (const [modelId, model] of Object.entries(data[key].models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "anthropic-messages",
|
|
provider,
|
|
// MiniMax's Anthropic-compatible API - SDK appends /v1/messages
|
|
baseUrl,
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process Kimi For Coding models
|
|
if (data["kimi-for-coding"]?.models) {
|
|
const kimiModels = data["kimi-for-coding"].models as Record<string, ModelsDevModel>;
|
|
const hasCanonicalModel = Object.prototype.hasOwnProperty.call(kimiModels, "kimi-for-coding");
|
|
|
|
const kimiAliases = new Set(["k2p5", "k2p6"]);
|
|
|
|
for (const [modelId, model] of Object.entries(kimiModels)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
// models.dev may expose versioned aliases (e.g. k2p5/k2p6).
|
|
// Normalize aliases to the canonical model id and drop duplicates when canonical exists.
|
|
if (kimiAliases.has(modelId) && hasCanonicalModel) continue;
|
|
|
|
const normalizedId = kimiAliases.has(modelId) ? "kimi-for-coding" : modelId;
|
|
const normalizedName = kimiAliases.has(modelId) ? "Kimi For Coding" : m.name || normalizedId;
|
|
|
|
models.push({
|
|
id: normalizedId,
|
|
name: normalizedName,
|
|
api: "anthropic-messages",
|
|
provider: "kimi-coding",
|
|
// Kimi For Coding's Anthropic-compatible API - SDK appends /v1/messages
|
|
baseUrl: "https://api.kimi.com/coding",
|
|
headers: { ...KIMI_STATIC_HEADERS },
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Moonshot AI models
|
|
const moonshotVariants = [
|
|
{ key: "moonshotai", provider: "moonshotai", baseUrl: "https://api.moonshot.ai/v1" },
|
|
{ key: "moonshotai-cn", provider: "moonshotai-cn", baseUrl: "https://api.moonshot.cn/v1" },
|
|
] as const;
|
|
const moonshotCompat: OpenAICompletionsCompat = {
|
|
supportsStore: false,
|
|
supportsDeveloperRole: false,
|
|
supportsReasoningEffort: false,
|
|
maxTokensField: "max_tokens",
|
|
supportsStrictMode: false,
|
|
};
|
|
|
|
for (const { key, provider, baseUrl } of moonshotVariants) {
|
|
if (!data[key]?.models) continue;
|
|
|
|
for (const [modelId, model] of Object.entries(data[key].models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "openai-completions",
|
|
provider,
|
|
baseUrl,
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
compat: moonshotCompat,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process Xiaomi MiMo models
|
|
// Built-in `xiaomi` targets the API billing endpoint (single stable URL,
|
|
// keys from platform.xiaomimimo.com). The three `xiaomi-token-plan-*`
|
|
// providers cover prepaid Token Plan endpoints in cn / ams / sgp.
|
|
const xiaomiCompat: OpenAICompletionsCompat = {
|
|
requiresReasoningContentOnAssistantMessages: true,
|
|
thinkingFormat: "deepseek",
|
|
};
|
|
const xiaomiVariants = [
|
|
{ provider: "xiaomi", baseUrl: "https://api.xiaomimimo.com/v1" },
|
|
{ provider: "xiaomi-token-plan-cn", baseUrl: "https://token-plan-cn.xiaomimimo.com/v1" },
|
|
{ provider: "xiaomi-token-plan-ams", baseUrl: "https://token-plan-ams.xiaomimimo.com/v1" },
|
|
{ provider: "xiaomi-token-plan-sgp", baseUrl: "https://token-plan-sgp.xiaomimimo.com/v1" },
|
|
] as const;
|
|
|
|
if (data.xiaomi?.models) {
|
|
for (const { provider, baseUrl } of xiaomiVariants) {
|
|
for (const [modelId, model] of Object.entries(data.xiaomi.models)) {
|
|
const m = model as ModelsDevModel;
|
|
if (m.tool_call !== true) continue;
|
|
|
|
models.push({
|
|
id: modelId,
|
|
name: m.name || modelId,
|
|
api: "openai-completions",
|
|
provider,
|
|
baseUrl,
|
|
compat: xiaomiCompat,
|
|
reasoning: m.reasoning === true,
|
|
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
|
cost: {
|
|
input: m.cost?.input || 0,
|
|
output: m.cost?.output || 0,
|
|
cacheRead: m.cost?.cache_read || 0,
|
|
cacheWrite: m.cost?.cache_write || 0,
|
|
},
|
|
contextWindow: m.limit?.context || 4096,
|
|
maxTokens: m.limit?.output || 4096,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`Loaded ${models.length} tool-capable models from models.dev`);
|
|
return models;
|
|
} catch (error) {
|
|
console.error("Failed to load models.dev data:", error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
async function generateModels() {
|
|
// Fetch models from both sources
|
|
// models.dev: Anthropic, Google, OpenAI, Groq, Cerebras
|
|
// OpenRouter: xAI and other providers (excluding Anthropic, Google, OpenAI)
|
|
// AI Gateway: OpenAI-compatible catalog with tool-capable models
|
|
const modelsDevModels = await loadModelsDevData();
|
|
const openRouterModels = await fetchOpenRouterModels();
|
|
const aiGatewayModels = await fetchAiGatewayModels();
|
|
|
|
// Combine models (models.dev has priority)
|
|
const allModels = [...modelsDevModels, ...openRouterModels, ...aiGatewayModels].filter(
|
|
(model) =>
|
|
!((model.provider === "opencode" || model.provider === "opencode-go") && model.id === "gpt-5.3-codex-spark"),
|
|
);
|
|
|
|
// Fix incorrect cache pricing for Claude Opus 4.5 from models.dev
|
|
// models.dev has 3x the correct pricing (1.5/18.75 instead of 0.5/6.25)
|
|
const opus45 = allModels.find(m => m.provider === "anthropic" && m.id === "claude-opus-4-5");
|
|
if (opus45) {
|
|
opus45.cost.cacheRead = 0.5;
|
|
opus45.cost.cacheWrite = 6.25;
|
|
}
|
|
|
|
// Temporary overrides until upstream model metadata is corrected.
|
|
for (const candidate of allModels) {
|
|
if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-opus-4-6-v1")) {
|
|
candidate.cost.cacheRead = 0.5;
|
|
candidate.cost.cacheWrite = 6.25;
|
|
}
|
|
if (
|
|
(candidate.provider === "anthropic" ||
|
|
candidate.provider === "opencode" ||
|
|
candidate.provider === "opencode-go" ||
|
|
candidate.provider === "github-copilot") &&
|
|
(candidate.id === "claude-opus-4-6" ||
|
|
candidate.id === "claude-sonnet-4-6" ||
|
|
candidate.id === "claude-opus-4.6" ||
|
|
candidate.id === "claude-sonnet-4.6")
|
|
) {
|
|
candidate.contextWindow = 1000000;
|
|
}
|
|
|
|
// OpenCode variants list Claude Sonnet 4/4.5 with 1M context, actual limit is 200K
|
|
if (
|
|
(candidate.provider === "opencode" || candidate.provider === "opencode-go") &&
|
|
(candidate.id === "claude-sonnet-4-5" || candidate.id === "claude-sonnet-4")
|
|
) {
|
|
candidate.contextWindow = 200000;
|
|
}
|
|
if ((candidate.provider === "opencode" || candidate.provider === "opencode-go") && candidate.id === "gpt-5.4") {
|
|
candidate.contextWindow = 272000;
|
|
candidate.maxTokens = 128000;
|
|
}
|
|
if (candidate.provider === "openai" && (candidate.id === "gpt-5.4" || candidate.id === "gpt-5.5")) {
|
|
candidate.contextWindow = 272000;
|
|
candidate.maxTokens = 128000;
|
|
}
|
|
// Keep selected OpenRouter model metadata stable until upstream settles.
|
|
if (candidate.provider === "openrouter" && candidate.id === "moonshotai/kimi-k2.5") {
|
|
candidate.cost.input = 0.41;
|
|
candidate.cost.output = 2.06;
|
|
candidate.cost.cacheRead = 0.07;
|
|
candidate.maxTokens = 4096;
|
|
}
|
|
if (candidate.provider === "openrouter" && candidate.id === "z-ai/glm-5") {
|
|
candidate.cost.input = 0.6;
|
|
candidate.cost.output = 1.9;
|
|
candidate.cost.cacheRead = 0.119;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
// Add missing EU Opus 4.6 profile
|
|
if (!allModels.some((m) => m.provider === "amazon-bedrock" && m.id === "eu.anthropic.claude-opus-4-6-v1")) {
|
|
allModels.push({
|
|
id: "eu.anthropic.claude-opus-4-6-v1",
|
|
name: "Claude Opus 4.6 (EU)",
|
|
api: "bedrock-converse-stream",
|
|
provider: "amazon-bedrock",
|
|
baseUrl: getBedrockBaseUrl("eu.anthropic.claude-opus-4-6-v1"),
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
input: 5,
|
|
output: 25,
|
|
cacheRead: 0.5,
|
|
cacheWrite: 6.25,
|
|
},
|
|
contextWindow: 200000,
|
|
maxTokens: 128000,
|
|
});
|
|
}
|
|
|
|
// Add missing Claude Opus 4.6
|
|
if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-opus-4-6")) {
|
|
allModels.push({
|
|
id: "claude-opus-4-6",
|
|
name: "Claude Opus 4.6",
|
|
api: "anthropic-messages",
|
|
baseUrl: "https://api.anthropic.com",
|
|
provider: "anthropic",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
input: 5,
|
|
output: 25,
|
|
cacheRead: 0.5,
|
|
cacheWrite: 6.25,
|
|
},
|
|
contextWindow: 1000000,
|
|
maxTokens: 128000,
|
|
});
|
|
}
|
|
|
|
// Add missing Claude Opus 4.7
|
|
if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-opus-4-7")) {
|
|
allModels.push({
|
|
id: "claude-opus-4-7",
|
|
name: "Claude Opus 4.7",
|
|
api: "anthropic-messages",
|
|
baseUrl: "https://api.anthropic.com",
|
|
provider: "anthropic",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
input: 5,
|
|
output: 25,
|
|
cacheRead: 0.5,
|
|
cacheWrite: 6.25,
|
|
},
|
|
contextWindow: 1000000,
|
|
maxTokens: 128000,
|
|
});
|
|
}
|
|
|
|
// Add missing Claude Sonnet 4.6
|
|
if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-sonnet-4-6")) {
|
|
allModels.push({
|
|
id: "claude-sonnet-4-6",
|
|
name: "Claude Sonnet 4.6",
|
|
api: "anthropic-messages",
|
|
baseUrl: "https://api.anthropic.com",
|
|
provider: "anthropic",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
input: 3,
|
|
output: 15,
|
|
cacheRead: 0.3,
|
|
cacheWrite: 3.75,
|
|
},
|
|
contextWindow: 1000000,
|
|
maxTokens: 64000,
|
|
});
|
|
}
|
|
|
|
// Add missing Gemini 3.1 Flash Lite Preview until models.dev includes it.
|
|
if (!allModels.some((m) => m.provider === "google" && m.id === "gemini-3.1-flash-lite-preview")) {
|
|
allModels.push({
|
|
id: "gemini-3.1-flash-lite-preview",
|
|
name: "Gemini 3.1 Flash Lite Preview",
|
|
api: "google-generative-ai",
|
|
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
|
provider: "google",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
input: 0,
|
|
output: 0,
|
|
cacheRead: 0,
|
|
cacheWrite: 0,
|
|
},
|
|
contextWindow: 1048576,
|
|
maxTokens: 65536,
|
|
});
|
|
}
|
|
|
|
// Add missing gpt models
|
|
if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5-chat-latest")) {
|
|
allModels.push({
|
|
id: "gpt-5-chat-latest",
|
|
name: "GPT-5 Chat Latest",
|
|
api: "openai-responses",
|
|
baseUrl: "https://api.openai.com/v1",
|
|
provider: "openai",
|
|
reasoning: false,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
input: 1.25,
|
|
output: 10,
|
|
cacheRead: 0.125,
|
|
cacheWrite: 0,
|
|
},
|
|
contextWindow: 128000,
|
|
maxTokens: 16384,
|
|
});
|
|
}
|
|
|
|
if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex")) {
|
|
allModels.push({
|
|
id: "gpt-5.1-codex",
|
|
name: "GPT-5.1 Codex",
|
|
api: "openai-responses",
|
|
baseUrl: "https://api.openai.com/v1",
|
|
provider: "openai",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
input: 1.25,
|
|
output: 5,
|
|
cacheRead: 0.125,
|
|
cacheWrite: 1.25,
|
|
},
|
|
contextWindow: 400000,
|
|
maxTokens: 128000,
|
|
});
|
|
}
|
|
|
|
if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex-max")) {
|
|
allModels.push({
|
|
id: "gpt-5.1-codex-max",
|
|
name: "GPT-5.1 Codex Max",
|
|
api: "openai-responses",
|
|
baseUrl: "https://api.openai.com/v1",
|
|
provider: "openai",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
input: 1.25,
|
|
output: 10,
|
|
cacheRead: 0.125,
|
|
cacheWrite: 0,
|
|
},
|
|
contextWindow: 400000,
|
|
maxTokens: 128000,
|
|
});
|
|
}
|
|
|
|
if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.3-codex-spark")) {
|
|
allModels.push({
|
|
id: "gpt-5.3-codex-spark",
|
|
name: "GPT-5.3 Codex Spark",
|
|
api: "openai-responses",
|
|
baseUrl: "https://api.openai.com/v1",
|
|
provider: "openai",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
cost: {
|
|
input: 0,
|
|
output: 0,
|
|
cacheRead: 0,
|
|
cacheWrite: 0,
|
|
},
|
|
contextWindow: 128000,
|
|
maxTokens: 16384,
|
|
});
|
|
}
|
|
|
|
// Add missing GitHub Copilot GPT-5.3 models until models.dev includes them.
|
|
const copilotBaseModel = allModels.find(
|
|
(m) => m.provider === "github-copilot" && m.id === "gpt-5.2-codex",
|
|
);
|
|
if (copilotBaseModel) {
|
|
if (!allModels.some((m) => m.provider === "github-copilot" && m.id === "gpt-5.3-codex")) {
|
|
allModels.push({
|
|
...copilotBaseModel,
|
|
id: "gpt-5.3-codex",
|
|
name: "GPT-5.3 Codex",
|
|
});
|
|
}
|
|
}
|
|
|
|
if (!allModels.some((m) => m.provider === "openai" && m.id === "gpt-5.4")) {
|
|
allModels.push({
|
|
id: "gpt-5.4",
|
|
name: "GPT-5.4",
|
|
api: "openai-responses",
|
|
baseUrl: "https://api.openai.com/v1",
|
|
provider: "openai",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
input: 2.5,
|
|
output: 15,
|
|
cacheRead: 0.25,
|
|
cacheWrite: 0,
|
|
},
|
|
contextWindow: 272000,
|
|
maxTokens: 128000,
|
|
});
|
|
}
|
|
|
|
const deepseekCompat: OpenAICompletionsCompat = {
|
|
requiresReasoningContentOnAssistantMessages: true,
|
|
thinkingFormat: "deepseek",
|
|
};
|
|
const deepseekV4Models: Model<"openai-completions">[] = [
|
|
{
|
|
id: "deepseek-v4-flash",
|
|
name: "DeepSeek V4 Flash",
|
|
api: "openai-completions",
|
|
baseUrl: "https://api.deepseek.com",
|
|
provider: "deepseek",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
cost: {
|
|
input: 0.14,
|
|
output: 0.28,
|
|
cacheRead: 0.0028,
|
|
cacheWrite: 0,
|
|
},
|
|
contextWindow: 1000000,
|
|
maxTokens: 384000,
|
|
compat: deepseekCompat,
|
|
},
|
|
{
|
|
id: "deepseek-v4-pro",
|
|
name: "DeepSeek V4 Pro",
|
|
api: "openai-completions",
|
|
baseUrl: "https://api.deepseek.com",
|
|
provider: "deepseek",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
cost: {
|
|
input: 0.435,
|
|
output: 0.87,
|
|
cacheRead: 0.003625,
|
|
cacheWrite: 0,
|
|
},
|
|
contextWindow: 1000000,
|
|
maxTokens: 384000,
|
|
compat: deepseekCompat,
|
|
},
|
|
];
|
|
allModels.push(...deepseekV4Models);
|
|
|
|
for (const candidate of allModels) {
|
|
if (candidate.api === "openai-completions" && candidate.id.includes("deepseek-v4")) {
|
|
candidate.compat = {
|
|
...candidate.compat,
|
|
...(candidate.provider === "openrouter"
|
|
? {
|
|
requiresReasoningContentOnAssistantMessages:
|
|
deepseekCompat.requiresReasoningContentOnAssistantMessages,
|
|
thinkingFormat: deepseekCompat.thinkingFormat,
|
|
}
|
|
: deepseekCompat),
|
|
};
|
|
mergeThinkingLevelMap(candidate, DEEPSEEK_V4_THINKING_LEVEL_MAP);
|
|
}
|
|
}
|
|
|
|
const minimaxDirectSupportedIds = new Set(["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]);
|
|
|
|
for (const candidate of allModels) {
|
|
if (
|
|
(candidate.provider === "minimax" || candidate.provider === "minimax-cn") &&
|
|
minimaxDirectSupportedIds.has(candidate.id)
|
|
) {
|
|
candidate.contextWindow = 204800;
|
|
candidate.maxTokens = 131072;
|
|
}
|
|
}
|
|
|
|
for (let i = allModels.length - 1; i >= 0; i--) {
|
|
const candidate = allModels[i];
|
|
if (
|
|
(candidate.provider === "minimax" || candidate.provider === "minimax-cn") &&
|
|
!minimaxDirectSupportedIds.has(candidate.id)
|
|
) {
|
|
allModels.splice(i, 1);
|
|
}
|
|
}
|
|
|
|
// OpenAI Codex (ChatGPT OAuth) models
|
|
// NOTE: These are not fetched from models.dev; we keep a small, explicit list to avoid aliases.
|
|
// Context window is based on observed server limits (400s above ~272k), not marketing numbers.
|
|
const CODEX_BASE_URL = "https://chatgpt.com/backend-api";
|
|
const CODEX_CONTEXT = 272000;
|
|
const CODEX_MAX_TOKENS = 128000;
|
|
const codexModels: Model<"openai-codex-responses">[] = [
|
|
{
|
|
id: "gpt-5.2",
|
|
name: "GPT-5.2",
|
|
api: "openai-codex-responses",
|
|
provider: "openai-codex",
|
|
baseUrl: CODEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
|
|
contextWindow: CODEX_CONTEXT,
|
|
maxTokens: CODEX_MAX_TOKENS,
|
|
},
|
|
{
|
|
id: "gpt-5.3-codex",
|
|
name: "GPT-5.3 Codex",
|
|
api: "openai-codex-responses",
|
|
provider: "openai-codex",
|
|
baseUrl: CODEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
|
|
contextWindow: CODEX_CONTEXT,
|
|
maxTokens: CODEX_MAX_TOKENS,
|
|
},
|
|
{
|
|
id: "gpt-5.3-codex-spark",
|
|
name: "GPT-5.3 Codex Spark",
|
|
api: "openai-codex-responses",
|
|
provider: "openai-codex",
|
|
baseUrl: CODEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text"],
|
|
cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
|
|
contextWindow: CODEX_CONTEXT,
|
|
maxTokens: CODEX_MAX_TOKENS,
|
|
},
|
|
{
|
|
id: "gpt-5.4",
|
|
name: "GPT-5.4",
|
|
api: "openai-codex-responses",
|
|
provider: "openai-codex",
|
|
baseUrl: CODEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 },
|
|
contextWindow: CODEX_CONTEXT,
|
|
maxTokens: CODEX_MAX_TOKENS,
|
|
},
|
|
{
|
|
id: "gpt-5.4-mini",
|
|
name: "GPT-5.4 mini",
|
|
api: "openai-codex-responses",
|
|
provider: "openai-codex",
|
|
baseUrl: CODEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 0.75, output: 4.5, cacheRead: 0.075, cacheWrite: 0 },
|
|
contextWindow: CODEX_CONTEXT,
|
|
maxTokens: CODEX_MAX_TOKENS,
|
|
},
|
|
{
|
|
id: "gpt-5.5",
|
|
name: "GPT-5.5",
|
|
api: "openai-codex-responses",
|
|
provider: "openai-codex",
|
|
baseUrl: CODEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 5, output: 30, cacheRead: 0.5, cacheWrite: 0 },
|
|
contextWindow: CODEX_CONTEXT,
|
|
maxTokens: CODEX_MAX_TOKENS,
|
|
},
|
|
];
|
|
allModels.push(...codexModels);
|
|
|
|
// Add missing Grok models
|
|
const missingGrokModels: Model<"openai-completions">[] = [
|
|
{
|
|
id: "grok-3",
|
|
name: "Grok 3",
|
|
api: "openai-completions",
|
|
baseUrl: "https://api.x.ai/v1",
|
|
provider: "xai",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
cost: { input: 3, output: 15, cacheRead: 0.75, cacheWrite: 0 },
|
|
contextWindow: 131072,
|
|
maxTokens: 8192,
|
|
},
|
|
{
|
|
id: "grok-3-fast",
|
|
name: "Grok 3 Fast",
|
|
api: "openai-completions",
|
|
baseUrl: "https://api.x.ai/v1",
|
|
provider: "xai",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
cost: { input: 5, output: 25, cacheRead: 1.25, cacheWrite: 0 },
|
|
contextWindow: 131072,
|
|
maxTokens: 8192,
|
|
},
|
|
{
|
|
id: "grok-code-fast-1",
|
|
name: "Grok Code Fast 1",
|
|
api: "openai-completions",
|
|
baseUrl: "https://api.x.ai/v1",
|
|
provider: "xai",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
cost: {
|
|
input: 0.2,
|
|
output: 1.5,
|
|
cacheRead: 0.02,
|
|
cacheWrite: 0,
|
|
},
|
|
contextWindow: 32768,
|
|
maxTokens: 8192,
|
|
},
|
|
];
|
|
for (const model of missingGrokModels) {
|
|
if (!allModels.some(m => m.provider === model.provider && m.id === model.id)) {
|
|
allModels.push(model);
|
|
}
|
|
}
|
|
|
|
// Add missing Mistral Medium 3.5 model until models.dev includes it
|
|
if (!allModels.some(m => m.provider === "mistral" && m.id === "mistral-medium-3.5")) {
|
|
allModels.push({
|
|
id: "mistral-medium-3.5",
|
|
name: "Mistral Medium 3.5",
|
|
api: "mistral-conversations",
|
|
provider: "mistral",
|
|
baseUrl: "https://api.mistral.ai",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
input: 1.5,
|
|
output: 7.5,
|
|
cacheRead: 0,
|
|
cacheWrite: 0,
|
|
},
|
|
contextWindow: 262144, // 256k tokens
|
|
maxTokens: 262144,
|
|
});
|
|
}
|
|
|
|
// Add "auto" alias for openrouter/auto
|
|
if (!allModels.some(m => m.provider === "openrouter" && m.id === "auto")) {
|
|
allModels.push({
|
|
id: "auto",
|
|
name: "Auto",
|
|
api: "openai-completions",
|
|
provider: "openrouter",
|
|
baseUrl: "https://openrouter.ai/api/v1",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: {
|
|
// we dont know about the costs because OpenRouter auto routes to different models
|
|
// and then charges you for the underlying used model
|
|
input:0,
|
|
output:0,
|
|
cacheRead:0,
|
|
cacheWrite:0,
|
|
},
|
|
contextWindow: 2000000,
|
|
maxTokens: 30000,
|
|
});
|
|
}
|
|
|
|
const VERTEX_BASE_URL = "https://{location}-aiplatform.googleapis.com";
|
|
const vertexModels: Model<"google-vertex">[] = [
|
|
{
|
|
id: "gemini-3-pro-preview",
|
|
name: "Gemini 3 Pro Preview (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
|
|
contextWindow: 1000000,
|
|
maxTokens: 64000,
|
|
},
|
|
{
|
|
id: "gemini-3.1-pro-preview",
|
|
name: "Gemini 3.1 Pro Preview (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
|
|
contextWindow: 1048576,
|
|
maxTokens: 65536,
|
|
},
|
|
{
|
|
id: "gemini-3.1-pro-preview-customtools",
|
|
name: "Gemini 3.1 Pro Preview Custom Tools (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
|
|
contextWindow: 1048576,
|
|
maxTokens: 65536,
|
|
},
|
|
{
|
|
id: "gemini-3-flash-preview",
|
|
name: "Gemini 3 Flash Preview (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0 },
|
|
contextWindow: 1048576,
|
|
maxTokens: 65536,
|
|
},
|
|
{
|
|
id: "gemini-2.0-flash",
|
|
name: "Gemini 2.0 Flash (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: false,
|
|
input: ["text", "image"],
|
|
cost: { input: 0.15, output: 0.6, cacheRead: 0.0375, cacheWrite: 0 },
|
|
contextWindow: 1048576,
|
|
maxTokens: 8192,
|
|
},
|
|
{
|
|
id: "gemini-2.0-flash-lite",
|
|
name: "Gemini 2.0 Flash Lite (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 },
|
|
contextWindow: 1048576,
|
|
maxTokens: 65536,
|
|
},
|
|
{
|
|
id: "gemini-2.5-pro",
|
|
name: "Gemini 2.5 Pro (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
|
|
contextWindow: 1048576,
|
|
maxTokens: 65536,
|
|
},
|
|
{
|
|
id: "gemini-2.5-flash",
|
|
name: "Gemini 2.5 Flash (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 0.3, output: 2.5, cacheRead: 0.03, cacheWrite: 0 },
|
|
contextWindow: 1048576,
|
|
maxTokens: 65536,
|
|
},
|
|
{
|
|
id: "gemini-2.5-flash-lite-preview-09-2025",
|
|
name: "Gemini 2.5 Flash Lite Preview 09-25 (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 },
|
|
contextWindow: 1048576,
|
|
maxTokens: 65536,
|
|
},
|
|
{
|
|
id: "gemini-2.5-flash-lite",
|
|
name: "Gemini 2.5 Flash Lite (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 },
|
|
contextWindow: 1048576,
|
|
maxTokens: 65536,
|
|
},
|
|
{
|
|
id: "gemini-1.5-pro",
|
|
name: "Gemini 1.5 Pro (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: false,
|
|
input: ["text", "image"],
|
|
cost: { input: 1.25, output: 5, cacheRead: 0.3125, cacheWrite: 0 },
|
|
contextWindow: 1000000,
|
|
maxTokens: 8192,
|
|
},
|
|
{
|
|
id: "gemini-1.5-flash",
|
|
name: "Gemini 1.5 Flash (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: false,
|
|
input: ["text", "image"],
|
|
cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 },
|
|
contextWindow: 1000000,
|
|
maxTokens: 8192,
|
|
},
|
|
{
|
|
id: "gemini-1.5-flash-8b",
|
|
name: "Gemini 1.5 Flash-8B (Vertex)",
|
|
api: "google-vertex",
|
|
provider: "google-vertex",
|
|
baseUrl: VERTEX_BASE_URL,
|
|
reasoning: false,
|
|
input: ["text", "image"],
|
|
cost: { input: 0.0375, output: 0.15, cacheRead: 0.01, cacheWrite: 0 },
|
|
contextWindow: 1000000,
|
|
maxTokens: 8192,
|
|
},
|
|
];
|
|
allModels.push(...vertexModels);
|
|
|
|
const azureOpenAiModels: Model<Api>[] = allModels
|
|
.filter((model) => model.provider === "openai" && model.api === "openai-responses")
|
|
.map((model) => ({
|
|
...model,
|
|
api: "azure-openai-responses",
|
|
provider: "azure-openai-responses",
|
|
baseUrl: "",
|
|
}));
|
|
allModels.push(...azureOpenAiModels);
|
|
|
|
for (const model of allModels) {
|
|
applyThinkingLevelMetadata(model);
|
|
}
|
|
|
|
// Group by provider and deduplicate by model ID
|
|
const providers: Record<string, Record<string, Model<any>>> = {};
|
|
for (const model of allModels) {
|
|
if (!providers[model.provider]) {
|
|
providers[model.provider] = {};
|
|
}
|
|
// Use model ID as key to automatically deduplicate
|
|
// Only add if not already present (models.dev takes priority over OpenRouter)
|
|
if (!providers[model.provider][model.id]) {
|
|
providers[model.provider][model.id] = model;
|
|
}
|
|
}
|
|
|
|
// Generate TypeScript file
|
|
let output = `// This file is auto-generated by scripts/generate-models.ts
|
|
// Do not edit manually - run 'npm run generate-models' to update
|
|
|
|
import type { Model } from "./types.ts";
|
|
|
|
export const MODELS = {
|
|
`;
|
|
|
|
// Generate provider sections (sorted for deterministic output)
|
|
const sortedProviderIds = Object.keys(providers).sort();
|
|
for (const providerId of sortedProviderIds) {
|
|
const models = providers[providerId];
|
|
output += `\t${JSON.stringify(providerId)}: {\n`;
|
|
|
|
const sortedModelIds = Object.keys(models).sort();
|
|
for (const modelId of sortedModelIds) {
|
|
const model = models[modelId];
|
|
output += `\t\t"${model.id}": {\n`;
|
|
output += `\t\t\tid: "${model.id}",\n`;
|
|
output += `\t\t\tname: "${model.name}",\n`;
|
|
output += `\t\t\tapi: "${model.api}",\n`;
|
|
output += `\t\t\tprovider: "${model.provider}",\n`;
|
|
if (model.baseUrl !== undefined) {
|
|
output += `\t\t\tbaseUrl: "${model.baseUrl}",\n`;
|
|
}
|
|
if (model.headers) {
|
|
output += `\t\t\theaders: ${JSON.stringify(model.headers)},\n`;
|
|
}
|
|
if (model.compat) {
|
|
output += ` compat: ${JSON.stringify(model.compat)},
|
|
`;
|
|
}
|
|
output += `\t\t\treasoning: ${model.reasoning},\n`;
|
|
if (model.thinkingLevelMap) {
|
|
output += `\t\t\tthinkingLevelMap: ${JSON.stringify(model.thinkingLevelMap)},\n`;
|
|
}
|
|
output += `\t\t\tinput: [${model.input.map(i => `"${i}"`).join(", ")}],\n`;
|
|
output += `\t\t\tcost: {\n`;
|
|
output += `\t\t\t\tinput: ${model.cost.input},\n`;
|
|
output += `\t\t\t\toutput: ${model.cost.output},\n`;
|
|
output += `\t\t\t\tcacheRead: ${model.cost.cacheRead},\n`;
|
|
output += `\t\t\t\tcacheWrite: ${model.cost.cacheWrite},\n`;
|
|
output += `\t\t\t},\n`;
|
|
output += `\t\t\tcontextWindow: ${model.contextWindow},\n`;
|
|
output += `\t\t\tmaxTokens: ${model.maxTokens},\n`;
|
|
output += `\t\t} satisfies Model<"${model.api}">,\n`;
|
|
}
|
|
|
|
output += `\t},\n`;
|
|
}
|
|
|
|
output += `} as const;
|
|
`;
|
|
|
|
// Write file
|
|
writeFileSync(join(packageRoot, "src/models.generated.ts"), output);
|
|
console.log("Generated src/models.generated.ts");
|
|
|
|
// Print statistics
|
|
const totalModels = allModels.length;
|
|
const reasoningModels = allModels.filter(m => m.reasoning).length;
|
|
|
|
console.log(`\nModel Statistics:`);
|
|
console.log(` Total tool-capable models: ${totalModels}`);
|
|
console.log(` Reasoning-capable models: ${reasoningModels}`);
|
|
|
|
for (const [provider, models] of Object.entries(providers)) {
|
|
console.log(` ${provider}: ${Object.keys(models).length} models`);
|
|
}
|
|
}
|
|
|
|
// Run the generator
|
|
generateModels().catch(console.error);
|