mirror of
https://github.com/anomalyco/opencode.git
synced 2026-05-01 22:10:23 +00:00
zen: tpm routing
This commit is contained in:
parent
3e8abac625
commit
f74a255ca9
5 changed files with 2746 additions and 29 deletions
|
|
@ -448,31 +448,40 @@ export async function handler(
|
||||||
return modelInfo.providers.find((provider) => provider.id === modelInfo.byokProvider)
|
return modelInfo.providers.find((provider) => provider.id === modelInfo.byokProvider)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filter out TPM limited providers
|
||||||
|
const allProviders = modelInfo.providers.filter((provider) => {
|
||||||
|
if (!provider.tpmLimit) return true
|
||||||
|
const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
|
||||||
|
return usage < provider.tpmLimit * 1_000_000
|
||||||
|
})
|
||||||
|
|
||||||
// Always use the same provider for the same session
|
// Always use the same provider for the same session
|
||||||
if (stickyProvider) {
|
if (stickyProvider) {
|
||||||
const provider = modelInfo.providers.find((provider) => provider.id === stickyProvider)
|
const provider = allProviders.find((provider) => provider.id === stickyProvider)
|
||||||
if (provider) return provider
|
if (provider) return provider
|
||||||
}
|
}
|
||||||
|
|
||||||
if (trialProviders) {
|
if (trialProviders) {
|
||||||
const trialProvider = trialProviders[Math.floor(Math.random() * trialProviders.length)]
|
const trialProvider = trialProviders[Math.floor(Math.random() * trialProviders.length)]
|
||||||
const provider = modelInfo.providers.find((provider) => provider.id === trialProvider)
|
const provider = allProviders.find((provider) => provider.id === trialProvider)
|
||||||
if (provider) return provider
|
if (provider) return provider
|
||||||
}
|
}
|
||||||
|
|
||||||
if (retry.retryCount !== MAX_FAILOVER_RETRIES) {
|
if (retry.retryCount !== MAX_FAILOVER_RETRIES) {
|
||||||
const allProviders = modelInfo.providers
|
let topPriority = Infinity
|
||||||
|
const providers = allProviders
|
||||||
.filter((provider) => !provider.disabled)
|
.filter((provider) => !provider.disabled)
|
||||||
.filter((provider) => provider.weight !== 0)
|
.filter((provider) => provider.weight !== 0)
|
||||||
.filter((provider) => !retry.excludeProviders.includes(provider.id))
|
.filter((provider) => !retry.excludeProviders.includes(provider.id))
|
||||||
.filter((provider) => {
|
.filter((provider) => {
|
||||||
if (!provider.tpmLimit) return true
|
if (!provider.tpmLimit) return true
|
||||||
const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
|
const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
|
||||||
return usage < provider.tpmLimit * 1_000_000
|
return usage < provider.tpmLimit * 1_000_000 * 0.8
|
||||||
|
})
|
||||||
|
.map((provider) => {
|
||||||
|
topPriority = Math.min(topPriority, provider.priority)
|
||||||
|
return provider
|
||||||
})
|
})
|
||||||
|
|
||||||
const topPriority = Math.min(...allProviders.map((p) => p.priority))
|
|
||||||
const providers = allProviders
|
|
||||||
.filter((p) => p.priority <= topPriority)
|
.filter((p) => p.priority <= topPriority)
|
||||||
.flatMap((provider) => Array<typeof provider>(provider.weight).fill(provider))
|
.flatMap((provider) => Array<typeof provider>(provider.weight).fill(provider))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,28 +1,25 @@
|
||||||
import { and, Database, eq, inArray, sql } from "@opencode-ai/console-core/drizzle/index.js"
|
import { and, Database, eq, inArray, sql } from "@opencode-ai/console-core/drizzle/index.js"
|
||||||
import { ModelTpmLimitTable } from "@opencode-ai/console-core/schema/ip.sql.js"
|
import { ModelTpmRateLimitTable } from "@opencode-ai/console-core/schema/ip.sql.js"
|
||||||
import { UsageInfo } from "./provider/provider"
|
import { UsageInfo } from "./provider/provider"
|
||||||
|
|
||||||
export function createModelTpmLimiter(providers: { id: string; model: string; tpmLimit?: number }[]) {
|
export function createModelTpmLimiter(providers: { id: string; model: string; tpmLimit?: number }[]) {
|
||||||
const ids = providers.filter((p) => p.tpmLimit).map((p) => `${p.id}/${p.model}`)
|
const ids = providers.filter((p) => p.tpmLimit).map((p) => `${p.id}/${p.model}`)
|
||||||
if (ids.length === 0) return
|
if (ids.length === 0) return
|
||||||
|
|
||||||
const yyyyMMddHHmm = new Date(Date.now())
|
const yyyyMMddHHmm = parseInt(
|
||||||
|
new Date(Date.now())
|
||||||
.toISOString()
|
.toISOString()
|
||||||
.replace(/[^0-9]/g, "")
|
.replace(/[^0-9]/g, "")
|
||||||
.substring(0, 12)
|
.substring(0, 12),
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
check: async () => {
|
check: async () => {
|
||||||
const data = await Database.use((tx) =>
|
const data = await Database.use((tx) =>
|
||||||
tx
|
tx
|
||||||
.select()
|
.select()
|
||||||
.from(ModelTpmLimitTable)
|
.from(ModelTpmRateLimitTable)
|
||||||
.where(
|
.where(and(inArray(ModelTpmRateLimitTable.id, ids), eq(ModelTpmRateLimitTable.interval, yyyyMMddHHmm))),
|
||||||
inArray(
|
|
||||||
ModelTpmLimitTable.id,
|
|
||||||
ids.map((id) => formatId(id, yyyyMMddHHmm)),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// convert to map of model to count
|
// convert to map of model to count
|
||||||
|
|
@ -41,14 +38,10 @@ export function createModelTpmLimiter(providers: { id: string; model: string; tp
|
||||||
if (usage <= 0) return
|
if (usage <= 0) return
|
||||||
await Database.use((tx) =>
|
await Database.use((tx) =>
|
||||||
tx
|
tx
|
||||||
.insert(ModelTpmLimitTable)
|
.insert(ModelTpmRateLimitTable)
|
||||||
.values({ id: formatId(id, yyyyMMddHHmm), count: usage })
|
.values({ id, interval: yyyyMMddHHmm, count: usage })
|
||||||
.onDuplicateKeyUpdate({ set: { count: sql`${ModelTpmLimitTable.count} + ${usage}` } }),
|
.onDuplicateKeyUpdate({ set: { count: sql`${ModelTpmRateLimitTable.count} + ${usage}` } }),
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
function formatId(id: string, yyyyMMddHHmm: string) {
|
|
||||||
return `${id.substring(0, 200)}/${yyyyMMddHHmm}`
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
CREATE TABLE `model_tpm_rate_limit` (
|
||||||
|
`id` varchar(255) PRIMARY KEY,
|
||||||
|
`interval` bigint NOT NULL,
|
||||||
|
`count` int NOT NULL
|
||||||
|
);
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,4 +1,4 @@
|
||||||
import { mysqlTable, int, primaryKey, varchar } from "drizzle-orm/mysql-core"
|
import { mysqlTable, int, primaryKey, varchar, bigint } from "drizzle-orm/mysql-core"
|
||||||
import { timestamps } from "../drizzle/types"
|
import { timestamps } from "../drizzle/types"
|
||||||
|
|
||||||
export const IpTable = mysqlTable(
|
export const IpTable = mysqlTable(
|
||||||
|
|
@ -31,10 +31,11 @@ export const KeyRateLimitTable = mysqlTable(
|
||||||
(table) => [primaryKey({ columns: [table.key, table.interval] })],
|
(table) => [primaryKey({ columns: [table.key, table.interval] })],
|
||||||
)
|
)
|
||||||
|
|
||||||
export const ModelTpmLimitTable = mysqlTable(
|
export const ModelTpmRateLimitTable = mysqlTable(
|
||||||
"model_tpm_limit",
|
"model_tpm_rate_limit",
|
||||||
{
|
{
|
||||||
id: varchar("id", { length: 255 }).notNull(),
|
id: varchar("id", { length: 255 }).notNull(),
|
||||||
|
interval: bigint("interval", { mode: "number" }).notNull(),
|
||||||
count: int("count").notNull(),
|
count: int("count").notNull(),
|
||||||
},
|
},
|
||||||
(table) => [primaryKey({ columns: [table.id] })],
|
(table) => [primaryKey({ columns: [table.id] })],
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue