From c34b3f41bdc904bd3e48dff50a3ea3a22ff42c11 Mon Sep 17 00:00:00 2001 From: diegosouzapw Date: Mon, 23 Mar 2026 11:08:14 -0300 Subject: [PATCH] feat: Add requested model to logs, enhance background task detection, and introduce AI SDK compatibility utilities. --- docs/API_REFERENCE.md | 23 +-- docs/USER_GUIDE.md | 16 ++ open-sse/config/providerRegistry.ts | 17 +++ open-sse/handlers/chatCore.ts | 42 +++--- open-sse/handlers/responseTranslator.ts | 71 ++++++--- open-sse/services/accountFallback.ts | 13 ++ open-sse/services/backgroundTaskDetector.ts | 138 +++++++++++------ open-sse/services/sessionManager.ts | 11 ++ open-sse/services/thinkingBudget.ts | 12 +- open-sse/utils/aiSdkCompat.ts | 31 ++++ open-sse/utils/proxyFetch.ts | 17 +++ .../api-manager/ApiManagerPageClient.tsx | 71 +++++++++ .../cli-tools/components/DefaultToolCard.tsx | 2 +- .../ProviderLimits/ProviderLimitCard.tsx | 1 + .../ProviderLimits/QuotaProgressBar.tsx | 10 +- .../components/ProviderLimits/QuotaTable.tsx | 5 +- .../usage/components/ProviderLimits/index.tsx | 29 +++- .../usage/components/ProviderLimits/utils.tsx | 90 +++++------ .../guide-settings/[toolId]/route.ts | 49 +++--- src/app/api/keys/[id]/route.ts | 3 + src/app/api/providers/[id]/test/route.ts | 2 + src/app/api/providers/validate/route.ts | 2 + src/app/api/sessions/route.ts | 4 +- src/lib/providers/validation.ts | 85 ++++++++++- src/lib/usage/callLogs.ts | 6 +- src/shared/components/RequestLoggerV2.tsx | 32 +++- src/shared/components/UsageAnalytics.tsx | 17 ++- src/shared/constants/cliTools.ts | 34 +++++ src/shared/constants/modelSpecs.ts | 111 ++++++++++++++ src/shared/constants/pricing.ts | 131 ++++++++++++---- src/shared/services/cliRuntime.ts | 32 +++- src/shared/services/opencodeConfig.ts | 64 ++++++++ src/shared/utils/apiKeyPolicy.ts | 1 + src/shared/validation/schemas.ts | 3 + src/sse/handlers/chat.ts | 90 ++++++++++- src/sse/services/auth.ts | 130 +++++++++++++++- tests/unit/background-task-detector.test.mjs | 25 +++- tests/unit/call-logs-requested-model.test.mjs | 52 +++++++ tests/unit/fixes-p1.test.mjs | 29 +++- .../openai-to-claude-strip-empty.test.mjs | 33 +++- tests/unit/t07-no-log-key-config.test.mjs | 5 + tests/unit/t12-pricing-updates.test.mjs | 34 +++++ tests/unit/t13-stale-quota-display.test.mjs | 31 ++++ tests/unit/t14-proxy-fast-fail.test.mjs | 35 +++++ .../unit/t16-gemini-enum-type-string.test.mjs | 53 +++++++ ...t19-codex-responses-empty-content.test.mjs | 66 ++++++++ tests/unit/t20-t22-provider-headers.test.mjs | 31 ++++ .../unit/t23-t24-fallback-resilience.test.mjs | 141 ++++++++++++++++++ ...vider-validation-modelid-fallback.test.mjs | 116 ++++++++++++++ .../t26-ai-sdk-accept-header-compat.test.mjs | 30 ++++ ...27-github-copilot-response-format.test.mjs | 84 +++++++++++ tests/unit/t28-model-catalog-updates.test.mjs | 41 +++++ .../unit/t29-vertex-sa-json-executor.test.mjs | 71 +++++++++ .../t30-kiro-400-model-unavailable.test.mjs | 29 ++++ .../unit/t31-t33-t34-t38-model-specs.test.mjs | 53 +++++++ ...40-opencode-cli-tools-integration.test.mjs | 67 +++++++++ .../t42-image-size-to-aspect-ratio.test.mjs | 96 ++++++++++++ tests/unit/thinking-budget.test.mjs | 14 +- 58 files changed, 2290 insertions(+), 241 deletions(-) create mode 100644 open-sse/utils/aiSdkCompat.ts create mode 100644 src/shared/constants/modelSpecs.ts create mode 100644 src/shared/services/opencodeConfig.ts create mode 100644 tests/unit/call-logs-requested-model.test.mjs create mode 100644 tests/unit/t12-pricing-updates.test.mjs create mode 100644 tests/unit/t13-stale-quota-display.test.mjs create mode 100644 tests/unit/t14-proxy-fast-fail.test.mjs create mode 100644 tests/unit/t16-gemini-enum-type-string.test.mjs create mode 100644 tests/unit/t19-codex-responses-empty-content.test.mjs create mode 100644 tests/unit/t20-t22-provider-headers.test.mjs create mode 100644 tests/unit/t23-t24-fallback-resilience.test.mjs create mode 100644 tests/unit/t25-provider-validation-modelid-fallback.test.mjs create mode 100644 tests/unit/t26-ai-sdk-accept-header-compat.test.mjs create mode 100644 tests/unit/t27-github-copilot-response-format.test.mjs create mode 100644 tests/unit/t28-model-catalog-updates.test.mjs create mode 100644 tests/unit/t29-vertex-sa-json-executor.test.mjs create mode 100644 tests/unit/t30-kiro-400-model-unavailable.test.mjs create mode 100644 tests/unit/t31-t33-t34-t38-model-specs.test.mjs create mode 100644 tests/unit/t40-opencode-cli-tools-integration.test.mjs create mode 100644 tests/unit/t42-image-size-to-aspect-ratio.test.mjs diff --git a/docs/API_REFERENCE.md b/docs/API_REFERENCE.md index 39c6f3f5..a0186e80 100644 --- a/docs/API_REFERENCE.md +++ b/docs/API_REFERENCE.md @@ -38,15 +38,20 @@ Content-Type: application/json ### Custom Headers -| Header | Direction | Description | -| ------------------------ | --------- | --------------------------------- | -| `X-OmniRoute-No-Cache` | Request | Set to `true` to bypass cache | -| `X-OmniRoute-Progress` | Request | Set to `true` for progress events | -| `Idempotency-Key` | Request | Dedup key (5s window) | -| `X-Request-Id` | Request | Alternative dedup key | -| `X-OmniRoute-Cache` | Response | `HIT` or `MISS` (non-streaming) | -| `X-OmniRoute-Idempotent` | Response | `true` if deduplicated | -| `X-OmniRoute-Progress` | Response | `enabled` if progress tracking on | +| Header | Direction | Description | +| ------------------------ | --------- | ------------------------------------------------ | +| `X-OmniRoute-No-Cache` | Request | Set to `true` to bypass cache | +| `X-OmniRoute-Progress` | Request | Set to `true` for progress events | +| `X-Session-Id` | Request | Sticky session key for external session affinity | +| `x_session_id` | Request | Underscore variant also accepted (direct HTTP) | +| `Idempotency-Key` | Request | Dedup key (5s window) | +| `X-Request-Id` | Request | Alternative dedup key | +| `X-OmniRoute-Cache` | Response | `HIT` or `MISS` (non-streaming) | +| `X-OmniRoute-Idempotent` | Response | `true` if deduplicated | +| `X-OmniRoute-Progress` | Response | `enabled` if progress tracking on | +| `X-OmniRoute-Session-Id` | Response | Effective session ID used by OmniRoute | + +> Nginx note: if you rely on underscore headers (for example `x_session_id`), enable `underscores_in_headers on;`. --- diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md index 8ae8fe43..6b68c77d 100644 --- a/docs/USER_GUIDE.md +++ b/docs/USER_GUIDE.md @@ -578,6 +578,22 @@ Configure via **Dashboard → Settings → Routing**. | **Least Used** | Routes to the account with the oldest `lastUsedAt` timestamp, distributing traffic evenly | | **Cost Optimized** | Routes to the account with the lowest priority value, optimizing for lowest-cost providers | +#### External Sticky Session Header + +For external session affinity (for example, Claude Code/Codex agents behind reverse proxies), send: + +```http +X-Session-Id: your-session-key +``` + +OmniRoute also accepts `x_session_id` and returns the effective session key in `X-OmniRoute-Session-Id`. + +If you use Nginx and send underscore-form headers, enable: + +```nginx +underscores_in_headers on; +``` + #### Wildcard Model Aliases Create wildcard patterns to remap model names: diff --git a/open-sse/config/providerRegistry.ts b/open-sse/config/providerRegistry.ts index aa4b057b..756a5881 100644 --- a/open-sse/config/providerRegistry.ts +++ b/open-sse/config/providerRegistry.ts @@ -192,6 +192,8 @@ export const REGISTRY: Record = { { id: "gemini-3.1-pro", name: "Gemini 3.1 Pro" }, { id: "gemini-3-1-pro", name: "Gemini 3.1 Pro (Alt ID)" }, { id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" }, + { id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" }, + { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" }, { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" }, { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" }, { id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" }, @@ -226,6 +228,8 @@ export const REGISTRY: Record = { { id: "gemini-3.1-pro", name: "Gemini 3.1 Pro" }, { id: "gemini-3-1-pro", name: "Gemini 3.1 Pro (Alt ID)" }, { id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" }, + { id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" }, + { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" }, { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" }, { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" }, { id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" }, @@ -782,6 +786,10 @@ export const REGISTRY: Record = { "Anthropic-Beta": "claude-code-20250219,interleaved-thinking-2025-05-14", }, models: [ + // T12/T28: MiniMax default upgraded from M2.5 to M2.7 + { id: "minimax-m2.7", name: "MiniMax M2.7" }, + { id: "MiniMax-M2.7", name: "MiniMax M2.7 (Legacy Alias)" }, + { id: "minimax-m2.7-highspeed", name: "MiniMax M2.7 Highspeed" }, { id: "minimax-m2.5", name: "MiniMax M2.5" }, { id: "MiniMax-M2.5", name: "MiniMax M2.5 (Legacy Alias)" }, { id: "MiniMax-M2.1", name: "MiniMax M2.1" }, @@ -803,6 +811,9 @@ export const REGISTRY: Record = { }, models: [ // Keep parity with minimax to ensure model discovery works for minimax-cn connections. + { id: "minimax-m2.7", name: "MiniMax M2.7" }, + { id: "MiniMax-M2.7", name: "MiniMax M2.7 (Legacy Alias)" }, + { id: "minimax-m2.7-highspeed", name: "MiniMax M2.7 Highspeed" }, { id: "minimax-m2.5", name: "MiniMax M2.5" }, { id: "MiniMax-M2.5", name: "MiniMax M2.5 (Legacy Alias)" }, { id: "MiniMax-M2.1", name: "MiniMax M2.1" }, @@ -1196,10 +1207,16 @@ export const REGISTRY: Record = { authType: "apikey", authHeader: "bearer", models: [ + { id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview (Vertex)" }, + { id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview (Vertex)" }, + { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview (Vertex)" }, { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro (Vertex)" }, { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash (Vertex)" }, { id: "gemini-2.0-flash-thinking-exp", name: "Gemini 2.0 Flash Thinking Exp (Vertex)" }, { id: "gemma-2-27b-it", name: "Gemma 2 27B (Vertex)" }, + { id: "deepseek-v3.2", name: "DeepSeek V3.2 (Vertex Partner)" }, + { id: "qwen3-next-80b", name: "Qwen3 Next 80B (Vertex Partner)" }, + { id: "glm-5", name: "GLM-5 (Vertex Partner)" }, { id: "claude-opus-4-5@20251101", name: "Claude Opus 4.5 (Vertex)" }, { id: "claude-sonnet-4-5@20251101", name: "Claude Sonnet 4.5 (Vertex)" }, ], diff --git a/open-sse/handlers/chatCore.ts b/open-sse/handlers/chatCore.ts index cbd10ab1..d7bcfde9 100644 --- a/open-sse/handlers/chatCore.ts +++ b/open-sse/handlers/chatCore.ts @@ -18,6 +18,7 @@ import { createErrorResult, parseUpstreamError, formatProviderError } from "../u import { HTTP_STATUS } from "../config/constants.ts"; import { classifyProviderError, PROVIDER_ERROR_TYPES } from "../services/errorClassifier.ts"; import { updateProviderConnection } from "@/lib/db/providers"; +import { logAuditEvent } from "@/lib/compliance"; import { handleBypassRequest } from "../utils/bypassHandler.ts"; import { saveRequestUsage, @@ -52,7 +53,7 @@ import { createProgressTransform, wantsProgress } from "../utils/progressTracker import { isModelUnavailableError, getNextFamilyFallback } from "../services/modelFamilyFallback.ts"; import { computeRequestHash, deduplicate, shouldDeduplicate } from "../services/requestDedup.ts"; import { - isBackgroundTask, + getBackgroundTaskReason, getDegradedModel, getBackgroundDegradationConfig, } from "../services/backgroundTaskDetector.ts"; @@ -61,6 +62,7 @@ import { isFallbackDecision, EMERGENCY_FALLBACK_CONFIG, } from "../services/emergencyFallback.ts"; +import { resolveStreamFlag, stripMarkdownCodeFence } from "../utils/aiSdkCompat.ts"; export function shouldUseNativeCodexPassthrough({ provider, @@ -234,17 +236,32 @@ export async function handleChatCore({ // ── Background Task Redirection (T41) ── const bgConfig = getBackgroundDegradationConfig(); - if (bgConfig.enabled && isBackgroundTask(body, clientRawRequest?.headers)) { + const backgroundReason = bgConfig.enabled + ? getBackgroundTaskReason(body, clientRawRequest?.headers) + : null; + if (backgroundReason) { const degradedModel = getDegradedModel(model); if (degradedModel !== model) { + const originalModel = model; log?.info?.( "BACKGROUND", - `Background task detected: Redirecting ${model} → ${degradedModel}` + `Background task redirect (${backgroundReason}): ${originalModel} → ${degradedModel}` ); model = degradedModel; if (body && typeof body === "object") { body.model = model; } + + logAuditEvent({ + action: "routing.background_task_redirect", + actor: apiKeyInfo?.name || "system", + target: connectionId || provider || "chat", + details: { + original_model: originalModel, + redirected_to: degradedModel, + reason: backgroundReason, + }, + }); } } @@ -269,12 +286,7 @@ export async function handleChatCore({ ? clientRawRequest.headers.get("accept") || clientRawRequest.headers.get("Accept") : (clientRawRequest?.headers || {})["accept"] || (clientRawRequest?.headers || {})["Accept"]; - const clientWantsJson = - typeof acceptHeader === "string" && - acceptHeader.includes("application/json") && - !acceptHeader.includes("text/event-stream"); - - const stream = body.stream === true && !clientWantsJson; + const stream = resolveStreamFlag(body?.stream, acceptHeader); // ── Phase 9.1: Semantic cache check (non-streaming, temp=0 only) ── if (isCacheable(body, clientRawRequest?.headers)) { @@ -1004,14 +1016,10 @@ export async function handleChatCore({ // T26: Strip markdown code blocks if provider format is Claude if (sourceFormat === "claude" && !stream) { - if (translatedResponse?.choices?.[0]?.message?.content) { - const text = translatedResponse.choices[0].message.content; - const codeBlockRegex = - /^```(?:json|javascript|typescript|js|ts)?\s*\n?([\s\S]*?)\n?```\s*$/; - const match = text.trim().match(codeBlockRegex); - if (match) { - translatedResponse.choices[0].message.content = match[1].trim(); - } + if (typeof translatedResponse?.choices?.[0]?.message?.content === "string") { + translatedResponse.choices[0].message.content = stripMarkdownCodeFence( + translatedResponse.choices[0].message.content + ) as string; } } diff --git a/open-sse/handlers/responseTranslator.ts b/open-sse/handlers/responseTranslator.ts index d24213ba..b4fe4472 100644 --- a/open-sse/handlers/responseTranslator.ts +++ b/open-sse/handlers/responseTranslator.ts @@ -20,6 +20,51 @@ function toNumber(value: unknown, fallback = 0): number { return Number.isFinite(parsed) ? parsed : fallback; } +function extractMessageOutputText(item: JsonRecord): string { + if (!Array.isArray(item.content)) return ""; + let text = ""; + for (const part of item.content) { + if (!part || typeof part !== "object") continue; + const partObj = toRecord(part); + if (partObj.type === "output_text" && typeof partObj.text === "string") { + text += partObj.text; + } + } + return text; +} + +/** + * T19: Pick the last non-empty message output text from Responses API output. + * Falls back to the last message item even when all message texts are empty. + */ +function findBestMessageText(output: unknown[]): { + text: string; + selectedMessageIndex: number; + messageItems: JsonRecord[]; +} { + const messageItems = output + .map((item) => toRecord(item)) + .filter((item) => item.type === "message" && Array.isArray(item.content)); + + for (let i = messageItems.length - 1; i >= 0; i -= 1) { + const text = extractMessageOutputText(messageItems[i]); + if (text.trim().length > 0) { + return { text, selectedMessageIndex: i, messageItems }; + } + } + + if (messageItems.length > 0) { + const lastIndex = messageItems.length - 1; + return { + text: extractMessageOutputText(messageItems[lastIndex]), + selectedMessageIndex: lastIndex, + messageItems, + }; + } + + return { text: "", selectedMessageIndex: -1, messageItems: [] }; +} + /** * Translate non-streaming response to OpenAI format * Handles different provider response formats (Gemini, Claude, etc.) @@ -44,7 +89,8 @@ export function translateNonStreamingResponse( const output = Array.isArray(response.output) ? response.output : []; const usage = toRecord(response.usage ?? responseRoot.usage); - let textContent = ""; + const messageSelection = findBestMessageText(output); + let textContent = messageSelection.text; let reasoningContent = ""; const toolCalls: JsonRecord[] = []; @@ -56,9 +102,7 @@ export function translateNonStreamingResponse( for (const part of itemObj.content) { if (!part || typeof part !== "object") continue; const partObj = toRecord(part); - if (partObj.type === "output_text" && typeof partObj.text === "string") { - textContent += partObj.text; - } else if (partObj.type === "summary_text" && typeof partObj.text === "string") { + if (partObj.type === "summary_text" && typeof partObj.text === "string") { reasoningContent += partObj.text; } } @@ -104,21 +148,14 @@ export function translateNonStreamingResponse( } if (process.env.DEBUG_RESPONSES_SSE_TO_JSON === "true") { - const msgItems = output.filter((i) => toRecord(i).type === "message"); - console.log(`[ResponsesSSE] ${output.length} output items, ${msgItems.length} message items`); - msgItems.forEach((item, idx) => { - const itemObj = toRecord(item); - let textLen = 0; - if (Array.isArray(itemObj.content)) { - for (const part of itemObj.content) { - const partObj = toRecord(part); - if (partObj.type === "output_text" && typeof partObj.text === "string") { - textLen += partObj.text.length; - } - } - } + console.log( + `[ResponsesSSE] ${output.length} output items, ${messageSelection.messageItems.length} message items` + ); + messageSelection.messageItems.forEach((item, idx) => { + const textLen = extractMessageOutputText(item).length; console.log(` [${idx}] text length: ${textLen}`); }); + console.log(` → Selected message index: ${messageSelection.selectedMessageIndex}`); console.log(` → Final text content length: ${textContent.length}`); } diff --git a/open-sse/services/accountFallback.ts b/open-sse/services/accountFallback.ts index 44f7ed12..066c072c 100644 --- a/open-sse/services/accountFallback.ts +++ b/open-sse/services/accountFallback.ts @@ -508,6 +508,19 @@ export function checkFallbackError( HTTP_STATUS.GATEWAY_TIMEOUT, ]; if (transientStatuses.includes(status)) { + const resetTime = parseResetFromHeaders(headers, errorStr); + if (resetTime) { + const waitMs = resetTime - Date.now(); + if (waitMs > 60_000) { + return { + shouldFallback: true, + cooldownMs: waitMs, + newBackoffLevel: 0, + reason: RateLimitReason.SERVER_ERROR, + }; + } + } + const profile = provider ? getProviderProfile(provider) : null; const baseCooldown = profile?.transientCooldown ?? COOLDOWN_MS.transientInitial; const maxLevel = profile?.maxBackoffLevel ?? BACKOFF_CONFIG.maxLevel; diff --git a/open-sse/services/backgroundTaskDetector.ts b/open-sse/services/backgroundTaskDetector.ts index 8d30762e..c767c4c7 100644 --- a/open-sse/services/backgroundTaskDetector.ts +++ b/open-sse/services/backgroundTaskDetector.ts @@ -47,16 +47,16 @@ const DEFAULT_DETECTION_PATTERNS = [ const DEFAULT_DEGRADATION_MAP: Record = { // Premium → Cheap alternatives - "claude-opus-4-6": "gemini-2.5-flash", - "claude-opus-4-6-thinking": "gemini-2.5-flash", - "claude-opus-4-5-20251101": "gemini-2.5-flash", - "claude-sonnet-4-5-20250929": "gemini-2.5-flash", - "claude-sonnet-4-20250514": "gemini-2.5-flash", - "claude-sonnet-4": "gemini-2.5-flash", - "gemini-3.1-pro": "gemini-3.1-flash", - "gemini-3.1-pro-high": "gemini-3.1-flash", + "claude-opus-4-6": "gemini-3-flash", + "claude-opus-4-6-thinking": "gemini-3-flash", + "claude-opus-4-5-20251101": "gemini-3-flash", + "claude-sonnet-4-5-20250929": "gemini-3-flash", + "claude-sonnet-4-20250514": "gemini-3-flash", + "claude-sonnet-4": "gemini-3-flash", + "gemini-3.1-pro": "gemini-3-flash", + "gemini-3.1-pro-high": "gemini-3-flash", "gemini-3-pro-preview": "gemini-3-flash-preview", - "gemini-2.5-pro": "gemini-2.5-flash", + "gemini-2.5-pro": "gemini-3-flash", "gpt-4o": "gpt-4o-mini", "gpt-5": "gpt-5-mini", "gpt-5.1": "gpt-5-mini", @@ -114,12 +114,93 @@ interface BackgroundMessage { interface BackgroundTaskBody { messages?: BackgroundMessage[]; input?: BackgroundMessage[]; + max_tokens?: unknown; + max_completion_tokens?: unknown; + max_output_tokens?: unknown; } function toMessageArray(value: unknown): BackgroundMessage[] { return Array.isArray(value) ? (value as BackgroundMessage[]) : []; } +function toFiniteNumber(value: unknown): number | null { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "string" && value.trim().length > 0) { + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : null; + } + return null; +} + +function headerValue(headers: Record | null, key: string): string { + if (!headers) return ""; + const value = headers[key] ?? headers[key.toLowerCase()] ?? headers[key.toUpperCase()]; + return typeof value === "string" ? value.trim() : ""; +} + +/** + * Get reason label when request is a background/utility task. + * + * @param {object} body - Request body + * @param {object} [headers] - Request headers (optional) + * @returns {string | null} Reason label or null when not detected + */ +export function getBackgroundTaskReason( + body: BackgroundTaskBody | unknown, + headers: Record | null = null +): string | null { + if (!body || typeof body !== "object") return null; + const typedBody = body as BackgroundTaskBody; + + // 1. Check explicit header + if (headers) { + const taskType = headerValue(headers, "x-task-type"); + const priority = headerValue(headers, "x-request-priority"); + const initiator = headerValue(headers, "x-initiator"); + const explicitValue = [taskType, priority, initiator].find(Boolean); + if (explicitValue && explicitValue.toLowerCase() === "background") { + return "header_background"; + } + } + + // 2. Very low max tokens usually indicates utility/background tasks + const maxTokens = toFiniteNumber( + typedBody.max_tokens ?? typedBody.max_completion_tokens ?? typedBody.max_output_tokens + ); + if (maxTokens !== null && maxTokens > 0 && maxTokens < 50) { + return "low_max_tokens"; + } + + // 3. Check system prompt for background task patterns + const messages = toMessageArray(typedBody.messages ?? typedBody.input ?? []); + if (!Array.isArray(messages) || messages.length === 0) return null; + + // Find system message + const systemMsg = messages.find( + (message: BackgroundMessage) => message.role === "system" || message.role === "developer" + ); + if (!systemMsg) return null; + + const systemContent = + typeof systemMsg.content === "string" ? systemMsg.content.toLowerCase() : ""; + + if (!systemContent) return null; + + // Check against detection patterns + const matched = _config.detectionPatterns.some((pattern) => + systemContent.includes(pattern.toLowerCase()) + ); + + if (!matched) return null; + + // 4. Additional heuristic: background tasks typically have very few messages + // (system + 1-2 user messages) + const userMessages = messages.filter((message: BackgroundMessage) => message.role === "user"); + if (userMessages.length > 3) return null; // Too many turns for a background task + + return "system_prompt_pattern"; +} + /** * Check if a request is a background/utility task. * @@ -131,44 +212,7 @@ export function isBackgroundTask( body: BackgroundTaskBody | unknown, headers: Record | null = null ): boolean { - if (!body || typeof body !== "object") return false; - const typedBody = body as BackgroundTaskBody; - - // 1. Check explicit header - if (headers) { - const priority = - headers["x-request-priority"] || headers["X-Request-Priority"] || headers["x-initiator"]; - if (priority === "background" || priority === "Background") return true; - } - - // 2. Check system prompt for background task patterns - const messages = toMessageArray(typedBody.messages ?? typedBody.input ?? []); - if (!Array.isArray(messages) || messages.length === 0) return false; - - // Find system message - const systemMsg = messages.find( - (message: BackgroundMessage) => message.role === "system" || message.role === "developer" - ); - if (!systemMsg) return false; - - const systemContent = - typeof systemMsg.content === "string" ? systemMsg.content.toLowerCase() : ""; - - if (!systemContent) return false; - - // Check against detection patterns - const matched = _config.detectionPatterns.some((pattern) => - systemContent.includes(pattern.toLowerCase()) - ); - - if (!matched) return false; - - // 3. Additional heuristic: background tasks typically have very few messages - // (system + 1-2 user messages) - const userMessages = messages.filter((message: BackgroundMessage) => message.role === "user"); - if (userMessages.length > 3) return false; // Too many turns for a background task - - return true; + return getBackgroundTaskReason(body, headers) !== null; } /** diff --git a/open-sse/services/sessionManager.ts b/open-sse/services/sessionManager.ts index 66ee3d09..7dd67758 100644 --- a/open-sse/services/sessionManager.ts +++ b/open-sse/services/sessionManager.ts @@ -197,6 +197,17 @@ export function getActiveSessionCountForKey(apiKeyId: string): number { return activeSessionsByKey.get(apiKeyId)?.size ?? 0; } +/** + * Snapshot of active session counts per API key. + */ +export function getAllActiveSessionCountsByKey(): Record { + const out: Record = {}; + for (const [apiKeyId, sessionIds] of activeSessionsByKey) { + out[apiKeyId] = sessionIds.size; + } + return out; +} + /** * T08: Register a session as belonging to an API key. * Call this after session creation is allowed (i.e., limit check passed). diff --git a/open-sse/services/thinkingBudget.ts b/open-sse/services/thinkingBudget.ts index b0b4b170..4028037c 100644 --- a/open-sse/services/thinkingBudget.ts +++ b/open-sse/services/thinkingBudget.ts @@ -29,9 +29,9 @@ export const EFFORT_BUDGETS = { // Used when clients send string-based thinking levels (e.g., VS Code Copilot) export const THINKING_LEVEL_MAP = { none: 0, - low: 1024, - medium: 10240, - high: 131072, + low: 4096, + medium: 8192, + high: 24576, max: 131072, // T11: max = full Claude budget (sub2api: xhigh) xhigh: 131072, // T11: explicit xhigh alias }; @@ -75,7 +75,8 @@ export function normalizeThinkingLevel(body) { // Handle top-level thinkingLevel or thinking_level string fields const levelStr = result.thinkingLevel || result.thinking_level; if (typeof levelStr === "string" && THINKING_LEVEL_MAP[levelStr.toLowerCase()] !== undefined) { - const budget = THINKING_LEVEL_MAP[levelStr.toLowerCase()]; + const rawBudget = THINKING_LEVEL_MAP[levelStr.toLowerCase()]; + const budget = capThinkingBudget(result.model || "", rawBudget); // Convert to Claude thinking format as canonical representation result.thinking = { type: budget > 0 ? "enabled" : "disabled", @@ -93,7 +94,8 @@ export function normalizeThinkingLevel(body) { typeof geminiLevel === "string" && THINKING_LEVEL_MAP[geminiLevel.toLowerCase()] !== undefined ) { - const budget = THINKING_LEVEL_MAP[geminiLevel.toLowerCase()]; + const rawBudget = THINKING_LEVEL_MAP[geminiLevel.toLowerCase()]; + const budget = capThinkingBudget(result.model || "", rawBudget); result.generationConfig = { ...result.generationConfig, thinkingConfig: { ...result.generationConfig.thinkingConfig, thinkingBudget: budget }, diff --git a/open-sse/utils/aiSdkCompat.ts b/open-sse/utils/aiSdkCompat.ts new file mode 100644 index 00000000..ec206f08 --- /dev/null +++ b/open-sse/utils/aiSdkCompat.ts @@ -0,0 +1,31 @@ +/** + * AI SDK compatibility helpers (T26). + */ + +/** + * Detects when a client explicitly prefers JSON (non-SSE) responses. + */ +export function clientWantsJsonResponse(acceptHeader: unknown): boolean { + if (typeof acceptHeader !== "string") return false; + const normalized = acceptHeader.toLowerCase(); + return normalized.includes("application/json") && !normalized.includes("text/event-stream"); +} + +/** + * Resolves stream behavior from request body + Accept header. + * OpenAI-compatible behavior: stream only when `stream: true` and client did not force JSON. + */ +export function resolveStreamFlag(bodyStream: unknown, acceptHeader: unknown): boolean { + return bodyStream === true && !clientWantsJsonResponse(acceptHeader); +} + +/** + * Removes surrounding markdown code fences when Claude wraps JSON payloads. + * Example: ```json\n{"ok":true}\n``` -> {"ok":true} + */ +export function stripMarkdownCodeFence(text: unknown): unknown { + if (typeof text !== "string") return text; + const codeBlockRegex = /^```(?:json|javascript|typescript|js|ts)?\s*\n?([\s\S]*?)\n?```\s*$/i; + const match = text.trim().match(codeBlockRegex); + return match ? match[1].trim() : text; +} diff --git a/open-sse/utils/proxyFetch.ts b/open-sse/utils/proxyFetch.ts index ee1ed0f7..09266a34 100644 --- a/open-sse/utils/proxyFetch.ts +++ b/open-sse/utils/proxyFetch.ts @@ -6,6 +6,7 @@ import { proxyUrlForLogs, } from "./proxyDispatcher.ts"; import tlsClient from "./tlsClient.ts"; +import { isProxyReachable } from "@/lib/proxyHealth"; function isTlsFingerprintEnabled() { return process.env.ENABLE_TLS_FINGERPRINT === "true"; @@ -134,6 +135,22 @@ export async function runWithProxyContext(proxyConfig, fn) { const resolvedProxyUrl = proxyConfig ? proxyConfigToUrl(proxyConfig) : null; + // T14: Proxy Fast-Fail + // Perform a short TCP reachability check before issuing upstream requests. + if (resolvedProxyUrl) { + const reachable = await isProxyReachable(resolvedProxyUrl); + if (!reachable) { + const proxyLabel = proxyUrlForLogs(resolvedProxyUrl); + const err = new Error(`[Proxy Fast-Fail] Proxy unreachable: ${proxyLabel}`) as Error & { + code?: string; + statusCode?: number; + }; + err.code = "PROXY_UNREACHABLE"; + err.statusCode = 503; + throw err; + } + } + return proxyContext.run(proxyConfig || null, async () => { if (resolvedProxyUrl) { console.log( diff --git a/src/app/(dashboard)/dashboard/api-manager/ApiManagerPageClient.tsx b/src/app/(dashboard)/dashboard/api-manager/ApiManagerPageClient.tsx index 75e59468..2155a01e 100644 --- a/src/app/(dashboard)/dashboard/api-manager/ApiManagerPageClient.tsx +++ b/src/app/(dashboard)/dashboard/api-manager/ApiManagerPageClient.tsx @@ -69,6 +69,7 @@ interface ApiKey { noLog?: boolean; autoResolve?: boolean; isActive?: boolean; + maxSessions?: number; accessSchedule?: AccessSchedule | null; createdAt: string; } @@ -109,6 +110,7 @@ export default function ApiManagerPageClient() { const [error, setError] = useState(null); const [isSubmitting, setIsSubmitting] = useState(false); const [usageStats, setUsageStats] = useState>({}); + const [sessionCounts, setSessionCounts] = useState>({}); const { copied, copy } = useCopyToClipboard(); @@ -150,6 +152,7 @@ export default function ApiManagerPageClient() { setKeys(data.keys || []); // Fetch usage stats after keys are loaded fetchUsageStats(data.keys || []); + fetchSessionCounts(data.keys || []); } } catch (error) { console.log("Error fetching keys:", error); @@ -187,6 +190,31 @@ export default function ApiManagerPageClient() { } }; + const fetchSessionCounts = async (apiKeys: ApiKey[]) => { + if (apiKeys.length === 0) { + setSessionCounts({}); + return; + } + try { + const res = await fetch("/api/sessions"); + if (!res.ok) return; + const data = await res.json(); + const byApiKeyRaw = + data && typeof data.byApiKey === "object" && !Array.isArray(data.byApiKey) + ? data.byApiKey + : {}; + const normalized: Record = {}; + for (const key of apiKeys) { + const value = byApiKeyRaw[key.id]; + normalized[key.id] = + typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0; + } + setSessionCounts(normalized); + } catch (error) { + console.log("Error fetching session counts:", error); + } + }; + const clearError = useCallback(() => setError(null), []); const handleCreateKey = async () => { @@ -266,6 +294,7 @@ export default function ApiManagerPageClient() { allowedConnections: string[], autoResolve: boolean, isActive: boolean, + maxSessions: number, accessSchedule: AccessSchedule | null ) => { if (!editingKey || !editingKey.id) return; @@ -291,6 +320,10 @@ export default function ApiManagerPageClient() { const validConnections = allowedConnections.filter( (id) => typeof id === "string" && /^[0-9a-f-]{36}$/i.test(id) ); + const normalizedMaxSessions = + typeof maxSessions === "number" && Number.isFinite(maxSessions) + ? Math.max(0, Math.floor(maxSessions)) + : 0; setIsSubmitting(true); clearError(); @@ -305,6 +338,7 @@ export default function ApiManagerPageClient() { noLog, autoResolve, isActive, + maxSessions: normalizedMaxSessions, accessSchedule, }), }); @@ -505,6 +539,9 @@ export default function ApiManagerPageClient() { Array.isArray(key.allowedConnections) && key.allowedConnections.length > 0; const noLogEnabled = key.noLog === true; const keyIsActive = key.isActive !== false; // default true + const maxSessions = typeof key.maxSessions === "number" ? key.maxSessions : 0; + const hasSessionLimit = maxSessions > 0; + const activeSessions = sessionCounts[key.id] || 0; const hasSchedule = key.accessSchedule?.enabled === true; return (
)} + {hasSessionLimit && ( + + group + Sessions: {activeSessions}/{maxSessions} + + )} {!keyIsActive && ( block @@ -778,6 +821,7 @@ const PermissionsModal = memo(function PermissionsModal({ connections: string[], autoResolve: boolean, isActive: boolean, + maxSessions: number, accessSchedule: AccessSchedule | null ) => void; }) { @@ -794,6 +838,9 @@ const PermissionsModal = memo(function PermissionsModal({ const [noLogEnabled, setNoLogEnabled] = useState(apiKey?.noLog === true); const [autoResolveEnabled, setAutoResolveEnabled] = useState(apiKey?.autoResolve === true); const [keyIsActive, setKeyIsActive] = useState(apiKey?.isActive !== false); + const [maxSessions, setMaxSessions] = useState( + typeof apiKey?.maxSessions === "number" && apiKey.maxSessions > 0 ? apiKey.maxSessions : 0 + ); const [scheduleEnabled, setScheduleEnabled] = useState(apiKey?.accessSchedule?.enabled === true); const [scheduleFrom, setScheduleFrom] = useState(apiKey?.accessSchedule?.from ?? "08:00"); const [scheduleUntil, setScheduleUntil] = useState(apiKey?.accessSchedule?.until ?? "18:00"); @@ -905,6 +952,7 @@ const PermissionsModal = memo(function PermissionsModal({ allowAllConnections ? [] : selectedConnections, autoResolveEnabled, keyIsActive, + maxSessions, schedule ); }, [ @@ -916,6 +964,7 @@ const PermissionsModal = memo(function PermissionsModal({ selectedConnections, autoResolveEnabled, keyIsActive, + maxSessions, scheduleEnabled, scheduleFrom, scheduleUntil, @@ -1007,6 +1056,28 @@ const PermissionsModal = memo(function PermissionsModal({
+ {/* Max Sessions Limit (T08) */} +
+
+

Max Active Sessions

+

+ 0 = unlimited. Return 429 when this key exceeds concurrent sticky sessions. +

+
+
+ { + const parsed = Number.parseInt(e.target.value || "0", 10); + setMaxSessions(Number.isFinite(parsed) && parsed > 0 ? parsed : 0); + }} + /> +
+
+ {/* Access Schedule */}
diff --git a/src/app/(dashboard)/dashboard/cli-tools/components/DefaultToolCard.tsx b/src/app/(dashboard)/dashboard/cli-tools/components/DefaultToolCard.tsx index bafd4852..7af7eebf 100644 --- a/src/app/(dashboard)/dashboard/cli-tools/components/DefaultToolCard.tsx +++ b/src/app/(dashboard)/dashboard/cli-tools/components/DefaultToolCard.tsx @@ -153,7 +153,7 @@ export default function DefaultToolCard({ }; // Check if this tool supports direct config file write - const supportsDirectSave = ["continue"].includes(toolId); + const supportsDirectSave = ["continue", "opencode"].includes(toolId); const renderApiKeySelector = () => { return ( diff --git a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/ProviderLimitCard.tsx b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/ProviderLimitCard.tsx index 4ea0c27c..f41acf90 100644 --- a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/ProviderLimitCard.tsx +++ b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/ProviderLimitCard.tsx @@ -165,6 +165,7 @@ export default function ProviderLimitCard({ percentage={percentage} unlimited={unlimited} resetTime={quota.resetAt} + staleAfterReset={quota.staleAfterReset === true} /> ); })} diff --git a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaProgressBar.tsx b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaProgressBar.tsx index 8885f9e2..51ff62bc 100644 --- a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaProgressBar.tsx +++ b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaProgressBar.tsx @@ -71,6 +71,7 @@ export default function QuotaProgressBar({ total = 0, unlimited = false, resetTime = null, + staleAfterReset = false, }) { const colors = getColorClasses(percentage); const countdown = formatResetTime(resetTime); @@ -105,12 +106,17 @@ export default function QuotaProgressBar({ {used.toLocaleString()} / {total.toLocaleString()} requests - {countdown !== "-" && ( + {staleAfterReset ? ( +
+ + Refreshing... +
+ ) : countdown !== "-" ? (
Reset in {countdown}
- )} + ) : null}
{/* Reset time display */} diff --git a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaTable.tsx b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaTable.tsx index eab32f59..47e8dcf7 100644 --- a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaTable.tsx +++ b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaTable.tsx @@ -92,6 +92,7 @@ export default function QuotaTable({ quotas = [] }) { quota.remainingPercentage !== undefined ? Math.round(quota.remainingPercentage) : calculatePercentage(quota.used, quota.total); + const staleAfterReset = quota.staleAfterReset === true; const colors = getColorClasses(remaining); const countdown = formatResetTime(quota.resetAt); @@ -140,7 +141,9 @@ export default function QuotaTable({ quotas = [] }) { {/* Reset Time */} - {countdown !== t("notAvailableSymbol") || resetDisplay ? ( + {staleAfterReset ? ( +
⟳ Refreshing...
+ ) : countdown !== t("notAvailableSymbol") || resetDisplay ? (
{countdown !== t("notAvailableSymbol") && (
diff --git a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/index.tsx b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/index.tsx index 56421b55..6f535e13 100644 --- a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/index.tsx +++ b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/index.tsx @@ -122,6 +122,7 @@ export default function ProviderLimits() { const intervalRef = useRef(null); const countdownRef = useRef(null); const lastFetchTimeRef = useRef({}); + const staleProbeRef = useRef({}); const fetchConnections = useCallback(async () => { try { @@ -137,11 +138,12 @@ export default function ProviderLimits() { } }, []); - const fetchQuota = useCallback(async (connectionId, provider) => { + const fetchQuota = useCallback(async (connectionId, provider, options = {}) => { + const force = options?.force === true; // Debounce: skip if last fetch was < MIN_FETCH_INTERVAL_MS ago const now = Date.now(); const lastFetch = lastFetchTimeRef.current[connectionId] || 0; - if (now - lastFetch < MIN_FETCH_INTERVAL_MS) { + if (!force && now - lastFetch < MIN_FETCH_INTERVAL_MS) { return; // Skip, data is still fresh } lastFetchTimeRef.current[connectionId] = now; @@ -165,6 +167,20 @@ export default function ProviderLimits() { } const data = await response.json(); const parsedQuotas = parseQuotaData(provider, data); + + // T13: If resetAt already passed but provider still returned stale cumulative usage, + // display 0 immediately and trigger a background probe to refresh snapshot. + const hasStaleAfterReset = parsedQuotas.some((q) => q?.staleAfterReset === true); + if (hasStaleAfterReset) { + const lastProbeAt = staleProbeRef.current[connectionId] || 0; + if (Date.now() - lastProbeAt >= MIN_FETCH_INTERVAL_MS) { + staleProbeRef.current[connectionId] = Date.now(); + setTimeout(() => { + fetchQuota(connectionId, provider, { force: true }).catch(() => {}); + }, 5000); + } + } + setQuotaData((prev) => ({ ...prev, [connectionId]: { @@ -571,6 +587,7 @@ export default function ProviderLimits() { const colors = getBarColor(remaining); const cd = formatCountdown(q.resetAt); const shortName = getShortModelName(q.name); + const staleAfterReset = q.staleAfterReset === true; return (
@@ -583,11 +600,15 @@ export default function ProviderLimits() { {/* Countdown */} - {cd && ( + {staleAfterReset ? ( + + ⟳ Refreshing... + + ) : cd ? ( ⏱ {cd} - )} + ) : null} {/* Progress bar */}
diff --git a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx index 7adf8c4d..a7fcd114 100644 --- a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx +++ b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx @@ -76,6 +76,40 @@ export function calculatePercentage(used, total) { return Math.round(((total - used) / total) * 100); } +function isPastResetWindow(resetAt) { + if (!resetAt) return false; + const resetTime = + typeof resetAt === "number" ? resetAt : typeof resetAt === "string" ? Date.parse(resetAt) : NaN; + if (!Number.isFinite(resetTime)) return false; + return Date.now() >= resetTime; +} + +function normalizeQuotaEntry(name, quota = {}, extras = {}) { + const usedRaw = Number(quota?.used || 0); + const totalRaw = Number(quota?.total || 0); + const resetAt = quota?.resetAt || null; + const staleAfterReset = isPastResetWindow(resetAt); + const used = staleAfterReset ? 0 : usedRaw; + const total = Number.isFinite(totalRaw) ? totalRaw : 0; + const remainingPercentageRaw = safePercentage(quota?.remainingPercentage); + const remainingPercentage = + staleAfterReset && total > 0 + ? 100 + : remainingPercentageRaw !== undefined + ? remainingPercentageRaw + : undefined; + + return { + name, + used: Number.isFinite(used) ? used : 0, + total, + resetAt, + staleAfterReset, + ...(remainingPercentage !== undefined ? { remainingPercentage } : {}), + ...extras, + }; +} + /** * Parse provider-specific quota structures into normalized array * @param {string} provider - Provider name (github, antigravity, codex, kiro, claude) @@ -95,13 +129,7 @@ export function parseQuotaData(provider, data) { if (quota?.unlimited && (!quota?.total || quota.total <= 0)) { return; } - normalizedQuotas.push({ - name, - used: quota.used || 0, - total: quota.total || 0, - resetAt: quota.resetAt || null, - remainingPercentage: safePercentage(quota.remainingPercentage), - }); + normalizedQuotas.push(normalizeQuotaEntry(name, quota)); }); } break; @@ -109,14 +137,11 @@ export function parseQuotaData(provider, data) { case "antigravity": if (data.quotas) { Object.entries(data.quotas).forEach(([modelKey, quota]: [string, any]) => { - normalizedQuotas.push({ - name: quota.displayName || modelKey, - modelKey: modelKey, // Keep modelKey for sorting - used: quota.used || 0, - total: quota.total || 0, - resetAt: quota.resetAt || null, - remainingPercentage: safePercentage(quota.remainingPercentage), - }); + normalizedQuotas.push( + normalizeQuotaEntry(quota.displayName || modelKey, quota, { + modelKey: modelKey, // Keep modelKey for sorting + }) + ); }); } break; @@ -124,12 +149,7 @@ export function parseQuotaData(provider, data) { case "codex": if (data.quotas) { Object.entries(data.quotas).forEach(([quotaType, quota]: [string, any]) => { - normalizedQuotas.push({ - name: quotaType, - used: quota.used || 0, - total: quota.total || 0, - resetAt: quota.resetAt || null, - }); + normalizedQuotas.push(normalizeQuotaEntry(quotaType, quota)); }); } break; @@ -137,12 +157,7 @@ export function parseQuotaData(provider, data) { case "kiro": if (data.quotas) { Object.entries(data.quotas).forEach(([quotaType, quota]: [string, any]) => { - normalizedQuotas.push({ - name: quotaType, - used: quota.used || 0, - total: quota.total || 0, - resetAt: quota.resetAt || null, - }); + normalizedQuotas.push(normalizeQuotaEntry(quotaType, quota)); }); } break; @@ -159,13 +174,7 @@ export function parseQuotaData(provider, data) { }); } else if (data.quotas) { Object.entries(data.quotas).forEach(([name, quota]: [string, any]) => { - normalizedQuotas.push({ - name, - used: quota.used || 0, - total: quota.total || 0, - resetAt: quota.resetAt || null, - remainingPercentage: safePercentage(quota.remainingPercentage), - }); + normalizedQuotas.push(normalizeQuotaEntry(name, quota)); }); } break; @@ -174,12 +183,7 @@ export function parseQuotaData(provider, data) { // Generic fallback for unknown providers if (data.quotas) { Object.entries(data.quotas).forEach(([name, quota]: [string, any]) => { - normalizedQuotas.push({ - name, - used: quota.used || 0, - total: quota.total || 0, - resetAt: quota.resetAt || null, - }); + normalizedQuotas.push(normalizeQuotaEntry(name, quota)); }); } } @@ -218,11 +222,7 @@ export function normalizePlanTier(plan) { const upper = raw.toUpperCase(); - if ( - upper.includes("PRO+") || - upper.includes("PRO PLUS") || - upper.includes("PROPLUS") - ) { + if (upper.includes("PRO+") || upper.includes("PRO PLUS") || upper.includes("PROPLUS")) { return { key: "plus", label: "Pro+", variant: "secondary", rank: 4, raw }; } diff --git a/src/app/api/cli-tools/guide-settings/[toolId]/route.ts b/src/app/api/cli-tools/guide-settings/[toolId]/route.ts index 2f0fd1fc..2d5e4e7e 100644 --- a/src/app/api/cli-tools/guide-settings/[toolId]/route.ts +++ b/src/app/api/cli-tools/guide-settings/[toolId]/route.ts @@ -3,6 +3,8 @@ import fs from "fs/promises"; import path from "path"; import os from "os"; import { getRuntimePorts } from "@/lib/runtime/ports"; +import { getOpenCodeConfigPath } from "@/shared/services/cliRuntime"; +import { mergeOpenCodeConfig } from "@/shared/services/opencodeConfig"; import { guideSettingsSaveSchema } from "@/shared/validation/schemas"; import { isValidationFailure, validateBody } from "@/shared/validation/helpers"; @@ -10,7 +12,7 @@ import { isValidationFailure, validateBody } from "@/shared/validation/helpers"; * POST /api/cli-tools/guide-settings/:toolId * * Save configuration for guide-based tools that have config files. - * Currently supports: continue + * Currently supports: continue, opencode */ export async function POST(request, { params }) { let rawBody; @@ -131,50 +133,39 @@ async function saveContinueConfig({ baseUrl, apiKey, model }) { } /** - * Save OpenCode config to ~/.config/opencode/config.toml (XDG_CONFIG_HOME aware). + * Save OpenCode config to: + * - Linux/macOS: ~/.config/opencode/opencode.json (XDG_CONFIG_HOME aware) + * - Windows: %APPDATA%/opencode/opencode.json + * * (#524) OpenCode was silently failing because this handler was missing. */ async function saveOpenCodeConfig({ baseUrl, apiKey, model }) { - const { apiPort } = getRuntimePorts(); - // Honour $XDG_CONFIG_HOME if set, otherwise use ~/.config per the XDG Base Directory spec - const xdgConfigHome = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), ".config"); - const configPath = path.join(xdgConfigHome, "opencode", "config.toml"); + const configPath = getOpenCodeConfigPath(); const configDir = path.dirname(configPath); - // Ensure ~/.config/opencode/ exists + // Ensure config directory exists await fs.mkdir(configDir, { recursive: true }); const normalizedBaseUrl = String(baseUrl || "") .trim() .replace(/\/+$/, ""); - // Read existing TOML to preserve any user settings outside our block - let existingContent = ""; + // Read existing JSON to preserve other provider entries + let existingConfig: Record = {}; try { - existingContent = await fs.readFile(configPath, "utf-8"); + const raw = await fs.readFile(configPath, "utf-8"); + existingConfig = JSON.parse(raw); } catch { - // File doesn't exist yet — start fresh + // File doesn't exist or invalid JSON — start fresh } - // Build the OmniRoute TOML block. - // opencode config.toml uses the [provider.X] table format. - void apiPort; // available for future port-based detection - const omniBlock = ` -# OmniRoute managed — updated automatically by OmniRoute CLI Tools -[provider.omniroute] -api_key = "${apiKey || "sk_omniroute"}" -base_url = "${normalizedBaseUrl}" -model = "${model}" -`; + const nextConfig = mergeOpenCodeConfig(existingConfig, { + baseUrl: normalizedBaseUrl, + apiKey, + model, + }); - // Remove old OmniRoute-managed block (if any) then append fresh one - const cleanedContent = existingContent - .replace(/\n?# OmniRoute managed[\s\S]*?(?=\n\[|$)/, "") - .trimEnd(); - - const newContent = (cleanedContent ? cleanedContent + "\n" : "") + omniBlock; - - await fs.writeFile(configPath, newContent, "utf-8"); + await fs.writeFile(configPath, JSON.stringify(nextConfig, null, 2), "utf-8"); return NextResponse.json({ success: true, diff --git a/src/app/api/keys/[id]/route.ts b/src/app/api/keys/[id]/route.ts index 499a9f72..e3f3b0b7 100644 --- a/src/app/api/keys/[id]/route.ts +++ b/src/app/api/keys/[id]/route.ts @@ -62,6 +62,7 @@ export async function PATCH(request, { params }) { noLog, autoResolve, isActive, + maxSessions, accessSchedule, } = validation.data; @@ -72,6 +73,7 @@ export async function PATCH(request, { params }) { if (noLog !== undefined) payload.noLog = noLog; if (autoResolve !== undefined) payload.autoResolve = autoResolve; if (isActive !== undefined) payload.isActive = isActive; + if (maxSessions !== undefined) payload.maxSessions = maxSessions; if (accessSchedule !== undefined) payload.accessSchedule = accessSchedule; const updated = await updateApiKeyPermissions(id, payload); @@ -90,6 +92,7 @@ export async function PATCH(request, { params }) { ...(noLog !== undefined && { noLog }), ...(autoResolve !== undefined && { autoResolve }), ...(isActive !== undefined && { isActive }), + ...(maxSessions !== undefined && { maxSessions }), ...(accessSchedule !== undefined && { accessSchedule }), }); } catch (error) { diff --git a/src/app/api/providers/[id]/test/route.ts b/src/app/api/providers/[id]/test/route.ts index ece35e28..0f04052e 100644 --- a/src/app/api/providers/[id]/test/route.ts +++ b/src/app/api/providers/[id]/test/route.ts @@ -516,6 +516,7 @@ async function testApiKeyConnection(connection: any) { return { valid: !!result.valid, error, + warning: result.warning || null, diagnosis, }; } @@ -667,6 +668,7 @@ export async function testSingleConnection(connectionId: string, validationModel return { valid: result.valid, error: result.error, + warning: result.warning || null, refreshed: result.refreshed || false, diagnosis, latencyMs, diff --git a/src/app/api/providers/validate/route.ts b/src/app/api/providers/validate/route.ts index 61d82a2f..60917878 100644 --- a/src/app/api/providers/validate/route.ts +++ b/src/app/api/providers/validate/route.ts @@ -63,6 +63,8 @@ export async function POST(request) { return NextResponse.json({ valid: !!result.valid, error: result.valid ? null : result.error || "Invalid API key", + warning: result.warning || null, + method: result.method || null, }); } catch (error) { console.log("Error validating API key:", error); diff --git a/src/app/api/sessions/route.ts b/src/app/api/sessions/route.ts index 16dff15d..cea6a9c6 100644 --- a/src/app/api/sessions/route.ts +++ b/src/app/api/sessions/route.ts @@ -2,13 +2,15 @@ import { NextResponse } from "next/server"; import { getActiveSessions, getActiveSessionCount, + getAllActiveSessionCountsByKey, } from "@omniroute/open-sse/services/sessionManager.ts"; export async function GET() { try { const sessions = getActiveSessions(); const count = getActiveSessionCount(); - return NextResponse.json({ count, sessions }); + const byApiKey = getAllActiveSessionCountsByKey(); + return NextResponse.json({ count, sessions, byApiKey }); } catch (error) { return NextResponse.json({ error: error.message }, { status: 500 }); } diff --git a/src/lib/providers/validation.ts b/src/lib/providers/validation.ts index ae71e4e8..f5ca56e1 100644 --- a/src/lib/providers/validation.ts +++ b/src/lib/providers/validation.ts @@ -99,8 +99,10 @@ async function validateOpenAILikeProvider({ return { valid: false, error: `Validation failed: ${modelsRes.status}` }; } + const testModelId = (providerSpecificData as any)?.validationModelId || modelId; + const testBody = { - model: modelId, + model: testModelId, messages: [{ role: "user", content: "test" }], max_tokens: 1, }; @@ -131,7 +133,13 @@ async function validateOpenAILikeProvider({ return { valid: true, error: null }; } -async function validateAnthropicLikeProvider({ apiKey, baseUrl, modelId, headers = {} }: any) { +async function validateAnthropicLikeProvider({ + apiKey, + baseUrl, + modelId, + headers = {}, + providerSpecificData = {}, +}: any) { if (!baseUrl) { return { valid: false, error: "Missing base URL" }; } @@ -149,11 +157,14 @@ async function validateAnthropicLikeProvider({ apiKey, baseUrl, modelId, headers requestHeaders["anthropic-version"] = "2023-06-01"; } + const testModelId = + providerSpecificData?.validationModelId || modelId || "claude-3-5-sonnet-20241022"; + const response = await fetch(baseUrl, { method: "POST", headers: requestHeaders, body: JSON.stringify({ - model: modelId || "claude-3-5-sonnet-20241022", + model: testModelId, max_tokens: 1, messages: [{ role: "user", content: "test" }], }), @@ -352,52 +363,104 @@ async function validateOpenAICompatibleProvider({ apiKey, providerSpecificData = return { valid: false, error: "No base URL configured for OpenAI compatible provider" }; } + const validationModelId = + typeof providerSpecificData?.validationModelId === "string" + ? providerSpecificData.validationModelId.trim() + : ""; + // Step 1: Try GET /models + let modelsReachable = false; try { const modelsRes = await fetch(`${baseUrl}/models`, { method: "GET", headers: buildBearerHeaders(apiKey), }); + modelsReachable = true; + if (modelsRes.ok) { - return { valid: true, error: null }; + return { valid: true, error: null, method: "models_endpoint" }; } if (modelsRes.status === 401 || modelsRes.status === 403) { return { valid: false, error: "Invalid API key" }; } + + // Endpoint responded and auth seems valid, but quota is exhausted/rate-limited. + if (modelsRes.status === 429) { + return { + valid: true, + error: null, + method: "models_endpoint", + warning: "Rate limited, but credentials are valid", + }; + } } catch { // /models fetch failed (network error, etc.) — fall through to chat test } + // T25: if /models cannot be used and no custom model was provided, return a + // clear actionable message instead of a generic connection error. + if (!validationModelId) { + return { + valid: false, + error: "Endpoint /models unavailable. Provide a Model ID to validate via /chat/completions.", + }; + } + // Step 2: Fallback — try a minimal chat completion request // Many providers don't expose /models but accept chat completions fine const apiType = providerSpecificData.apiType || "chat"; const chatSuffix = apiType === "responses" ? "/responses" : "/chat/completions"; const chatUrl = `${baseUrl}${chatSuffix}`; + const testModelId = validationModelId; try { const chatRes = await fetch(chatUrl, { method: "POST", headers: buildBearerHeaders(apiKey), body: JSON.stringify({ - model: "gpt-4o-mini", + model: testModelId, messages: [{ role: "user", content: "test" }], max_tokens: 1, }), }); if (chatRes.ok) { - return { valid: true, error: null }; + return { valid: true, error: null, method: "chat_completions" }; } if (chatRes.status === 401 || chatRes.status === 403) { return { valid: false, error: "Invalid API key" }; } + if (chatRes.status === 429) { + return { + valid: true, + error: null, + method: "chat_completions", + warning: "Rate limited, but credentials are valid", + }; + } + + // If /models was reachable but returned non-auth error, and chat succeeds + // auth-wise, this still confirms credentials are valid. + if (chatRes.status === 400) { + return { + valid: true, + error: null, + method: "inference_available", + warning: "Model ID may be invalid, but credentials are valid", + }; + } + // 4xx other than auth (e.g. 400 bad model, 422) usually means auth passed if (chatRes.status >= 400 && chatRes.status < 500) { - return { valid: true, error: null }; + return { + valid: true, + error: null, + method: "inference_available", + }; } if (chatRes.status >= 500) { @@ -410,6 +473,10 @@ async function validateOpenAICompatibleProvider({ apiKey, providerSpecificData = // Step 3: Final fallback — simple connectivity check // For local providers (Ollama, LM Studio, etc.) that may not respond to // standard OpenAI endpoints but are still reachable + if (!modelsReachable) { + return { valid: false, error: "Connection failed while testing /chat/completions" }; + } + try { const pingRes = await fetch(baseUrl, { method: "GET", @@ -464,12 +531,13 @@ async function validateAnthropicCompatibleProvider({ apiKey, providerSpecificDat } // Step 2: Fallback — try a minimal messages request + const testModelId = providerSpecificData?.validationModelId || "claude-3-5-sonnet-20241022"; try { const messagesRes = await fetch(`${baseUrl}/messages`, { method: "POST", headers, body: JSON.stringify({ - model: "claude-3-5-sonnet-20241022", + model: testModelId, max_tokens: 1, messages: [{ role: "user", content: "test" }], }), @@ -646,6 +714,7 @@ export async function validateProviderApiKey({ provider, apiKey, providerSpecifi baseUrl: requestBaseUrl, modelId, headers: requestHeaders, + providerSpecificData, }); } diff --git a/src/lib/usage/callLogs.ts b/src/lib/usage/callLogs.ts index 341dc753..2a18f3c7 100644 --- a/src/lib/usage/callLogs.ts +++ b/src/lib/usage/callLogs.ts @@ -330,7 +330,7 @@ export async function getCallLogs(filter: any = {}) { } if (filter.model) { - conditions.push("model LIKE @modelQ"); + conditions.push("(model LIKE @modelQ OR requested_model LIKE @modelQ)"); params.modelQ = `%${filter.model}%`; } if (filter.provider) { @@ -351,7 +351,8 @@ export async function getCallLogs(filter: any = {}) { if (filter.search) { conditions.push(`( model LIKE @searchQ OR path LIKE @searchQ OR account LIKE @searchQ OR - provider LIKE @searchQ OR api_key_name LIKE @searchQ OR api_key_id LIKE @searchQ OR + requested_model LIKE @searchQ OR provider LIKE @searchQ OR + api_key_name LIKE @searchQ OR api_key_id LIKE @searchQ OR combo_name LIKE @searchQ OR CAST(status AS TEXT) LIKE @searchQ )`); params.searchQ = `%${filter.search}%`; @@ -408,6 +409,7 @@ export async function getCallLogById(id: string) { path: toStringOrNull(entryRow.path), status: toNumber(entryRow.status), model: toStringOrNull(entryRow.model), + requestedModel: toStringOrNull(entryRow.requested_model), provider: toStringOrNull(entryRow.provider), account: toStringOrNull(entryRow.account), connectionId: toStringOrNull(entryRow.connection_id), diff --git a/src/shared/components/RequestLoggerV2.tsx b/src/shared/components/RequestLoggerV2.tsx index 91ed2d19..cd34ebf4 100644 --- a/src/shared/components/RequestLoggerV2.tsx +++ b/src/shared/components/RequestLoggerV2.tsx @@ -29,6 +29,7 @@ const STATUS_FILTERS = [ const COLUMNS = [ { key: "status", label: "Status" }, { key: "model", label: "Model" }, + { key: "requestedModel", label: "Requested" }, { key: "provider", label: "Provider" }, { key: "protocol", label: "Protocol" }, { key: "account", label: "Account" }, @@ -234,7 +235,9 @@ export default function RequestLoggerV2() { // Unique accounts and providers for dropdowns const uniqueAccounts = [...new Set(logs.map((l) => l.account).filter((a) => a && a !== "-"))]; - const uniqueModels = [...new Set(logs.map((l) => l.model).filter(Boolean))].sort(); + const uniqueModels = [ + ...new Set(logs.flatMap((l) => [l.model, l.requestedModel]).filter((value) => Boolean(value))), + ].sort(); const uniqueProviders = [ ...new Set(logs.map((l) => l.provider).filter((p) => p && p !== "-")), ].sort(); @@ -514,6 +517,11 @@ export default function RequestLoggerV2() { Model )} + {visibleColumns.requestedModel && ( + + Requested + + )} {visibleColumns.provider && ( Provider @@ -596,6 +604,28 @@ export default function RequestLoggerV2() { {log.model} )} + {visibleColumns.requestedModel && ( + + {log.requestedModel ? ( + + {log.requestedModel} + + ) : ( + + )} + + )} {visibleColumns.provider && ( {/* Summary Cards — Row 1: Core metrics */} -
+
+
{/* Summary Cards — Row 2: Derived insights */} -
+
+
{/* Activity Heatmap + Weekly Widgets */} diff --git a/src/shared/constants/cliTools.ts b/src/shared/constants/cliTools.ts index 026b5692..aa66c95b 100644 --- a/src/shared/constants/cliTools.ts +++ b/src/shared/constants/cliTools.ts @@ -180,12 +180,46 @@ export const CLI_TOOLS = { color: "#FF6B35", description: "OpenCode AI coding agent (Terminal)", configType: "guide", + notes: [ + { + type: "warning", + text: "Config path: Linux/macOS ~/.config/opencode/opencode.json • Windows %APPDATA%\\\\opencode\\\\opencode.json", + }, + { + type: "warning", + text: 'Thinking variant example: opencode run "implement this feature" --model omniroute/claude-sonnet-4-5-thinking --variant high', + }, + ], guideSteps: [ { step: 1, title: "Install OpenCode", desc: "Install via npm: npm install -g opencode-ai" }, { step: 2, title: "API Key", type: "apiKeySelector" }, { step: 3, title: "Set Base URL", desc: "opencode config set baseUrl {{baseUrl}}" }, { step: 4, title: "Select Model", type: "modelSelector" }, + { + step: 5, + title: "Use Thinking Variant", + desc: "For thinking models, run with --variant high/low/max (example command below).", + }, ], + codeBlock: { + language: "json", + code: `{ + "providers": { + "omniroute": { + "name": "OmniRoute", + "api": "openai", + "baseURL": "{{baseUrl}}", + "apiKey": "{{apiKey}}", + "models": [ + "{{model}}", + "claude-sonnet-4-5-thinking", + "gemini-3.1-pro-high", + "gemini-3-flash" + ] + } + } +}`, + }, }, kiro: { id: "kiro", diff --git a/src/shared/constants/modelSpecs.ts b/src/shared/constants/modelSpecs.ts new file mode 100644 index 00000000..ede662c7 --- /dev/null +++ b/src/shared/constants/modelSpecs.ts @@ -0,0 +1,111 @@ +/** + * Centralized specifications for AI Models. + * Contains maximum token caps and thinking budgets to prevent API errors + * when clients request more than the model supports. + */ + +export interface ModelSpec { + maxOutputTokens: number; + contextWindow?: number; + defaultThinkingBudget?: number; + thinkingBudgetCap?: number; + thinkingOverhead?: number; // buffer de tokens para thinking + adaptiveMaxTokens?: number; // tokens disponíveis para output quando thinking ativo + aliases?: string[]; // IDs alternativos para este modelo + supportsThinking?: boolean; + supportsTools?: boolean; + supportsVision?: boolean; +} + +export const MODEL_SPECS: Record = { + // ── Gemini 3 Flash series ─────────────────────────────────────── + "gemini-3-flash": { + maxOutputTokens: 65536, + contextWindow: 1048576, + defaultThinkingBudget: 0, + thinkingBudgetCap: 0, + supportsThinking: false, + supportsTools: true, + supportsVision: true, + aliases: ["gemini-3-flash-preview", "gemini-3.1-flash-lite-preview"], + }, + + // ── Gemini 3.1 Pro High ───────────────────────────────────────── + "gemini-3.1-pro-high": { + maxOutputTokens: 131072, + contextWindow: 1048576, + defaultThinkingBudget: 24576, + thinkingBudgetCap: 32768, + thinkingOverhead: 1000, + supportsThinking: true, + supportsTools: true, + supportsVision: true, + aliases: ["gemini-3-pro-high"], + }, + + // ── Gemini 3.1 Pro Low ────────────────────────────────────────── + "gemini-3.1-pro-low": { + maxOutputTokens: 131072, + contextWindow: 1048576, + defaultThinkingBudget: 8192, + thinkingBudgetCap: 16000, + supportsThinking: true, + supportsTools: true, + supportsVision: true, + aliases: ["gemini-3-pro-low"], + }, + + // ── Claude Opus 4.5 ───────────────────────────────────────────── + "claude-opus-4-5": { + maxOutputTokens: 32768, + contextWindow: 200000, + defaultThinkingBudget: 10000, + thinkingBudgetCap: 32000, + supportsThinking: true, + supportsTools: true, + supportsVision: true, + }, + + // Defaults + __default__: { + maxOutputTokens: 8192, + }, +}; + +export function getModelSpec(modelId: string): ModelSpec | undefined { + if (MODEL_SPECS[modelId]) return MODEL_SPECS[modelId]; + + // Buscas por alias + for (const [canonical, spec] of Object.entries(MODEL_SPECS)) { + if (spec.aliases?.includes(modelId)) return spec; + } + + // Prefix matching + for (const [key, spec] of Object.entries(MODEL_SPECS)) { + if (key !== "__default__" && modelId.startsWith(key)) return spec; + } + + return undefined; +} + +export function capMaxOutputTokens(modelId: string, requested?: number): number { + const spec = getModelSpec(modelId); + const cap = spec?.maxOutputTokens ?? MODEL_SPECS.__default__.maxOutputTokens; + return requested ? Math.min(requested, cap) : cap; +} + +export function getDefaultThinkingBudget(modelId: string): number { + return getModelSpec(modelId)?.defaultThinkingBudget ?? 0; +} + +export function capThinkingBudget(modelId: string, budget: number): number { + const cap = getModelSpec(modelId)?.thinkingBudgetCap ?? budget; + return Math.min(budget, cap); +} + +export function resolveModelAlias(modelId: string): string { + for (const [canonical, spec] of Object.entries(MODEL_SPECS)) { + if (spec.aliases?.includes(modelId)) return canonical; + } + return modelId; +} diff --git a/src/shared/constants/pricing.ts b/src/shared/constants/pricing.ts index 399de81d..aa7af829 100644 --- a/src/shared/constants/pricing.ts +++ b/src/shared/constants/pricing.ts @@ -102,6 +102,21 @@ export const DEFAULT_PRICING = { reasoning: 30.0, cache_creation: 5.0, }, + // T12: fallback pricing for gpt-5.4 mini variants + "gpt-5.4-mini": { + input: 1.5, + output: 6.0, + cached: 0.75, + reasoning: 9.0, + cache_creation: 1.5, + }, + "gpt5.4-mini": { + input: 1.5, + output: 6.0, + cached: 0.75, + reasoning: 9.0, + cache_creation: 1.5, + }, // GPT 5.3 Codex family (all same pricing tier) "gpt-5.3-codex": GPT_5_3_CODEX_PRICING, "gpt-5.3-codex-xhigh": GPT_5_3_CODEX_PRICING, @@ -183,6 +198,13 @@ export const DEFAULT_PRICING = { reasoning: 4.5, cache_creation: 0.5, }, + "gemini-3.1-flash-lite-preview": { + input: 0.5, + output: 3.0, + cached: 0.03, + reasoning: 4.5, + cache_creation: 0.5, + }, "gemini-3-pro-preview": { input: 2.0, output: 12.0, @@ -197,6 +219,20 @@ export const DEFAULT_PRICING = { reasoning: 18.0, cache_creation: 2.0, }, + "gemini-3-flash-preview": { + input: 0.5, + output: 3.0, + cached: 0.03, + reasoning: 4.5, + cache_creation: 0.5, + }, + "gemini-3.1-flash-lite-preview": { + input: 0.5, + output: 3.0, + cached: 0.03, + reasoning: 4.5, + cache_creation: 0.5, + }, "gemini-2.5-pro": { input: 2.0, output: 12.0, @@ -707,11 +743,11 @@ export const DEFAULT_PRICING = { // GLM glm: { "glm-5": { - input: 1.0, - output: 3.2, - cached: 0.5, - reasoning: 4.8, - cache_creation: 1.0, + input: 0.38, + output: 1.98, + cached: 0.19, + reasoning: 2.97, + cache_creation: 0.38, }, "glm-5-turbo": { input: 1.2, @@ -721,11 +757,11 @@ export const DEFAULT_PRICING = { cache_creation: 1.2, }, "glm-4.7": { - input: 0.75, - output: 3.0, - cached: 0.375, - reasoning: 4.5, - cache_creation: 0.75, + input: 0.38, + output: 1.98, + cached: 0.19, + reasoning: 2.97, + cache_creation: 0.38, }, "glm-4.6": { input: 0.5, @@ -761,6 +797,20 @@ export const DEFAULT_PRICING = { reasoning: 4.5, cache_creation: 0.6, }, + "kimi-k2.5-thinking": { + input: 0.6, + output: 3.0, + cached: 0.3, + reasoning: 4.5, + cache_creation: 0.6, + }, + "kimi-for-coding": { + input: 0.6, + output: 3.0, + cached: 0.3, + reasoning: 4.5, + cache_creation: 0.6, + }, "moonshot-kimi-k2.5": { input: 0.6, output: 3.0, @@ -770,6 +820,30 @@ export const DEFAULT_PRICING = { }, }, + // Kimi Coding aliases (OAuth/API key) + kmc: { + "kimi-k2.5": { input: 0.6, output: 3.0, cached: 0.3, reasoning: 4.5, cache_creation: 0.6 }, + "kimi-k2.5-thinking": { + input: 0.6, + output: 3.0, + cached: 0.3, + reasoning: 4.5, + cache_creation: 0.6, + }, + "kimi-latest": { input: 1.0, output: 4.0, cached: 0.5, reasoning: 6.0, cache_creation: 1.0 }, + }, + kmca: { + "kimi-k2.5": { input: 0.6, output: 3.0, cached: 0.3, reasoning: 4.5, cache_creation: 0.6 }, + "kimi-k2.5-thinking": { + input: 0.6, + output: 3.0, + cached: 0.3, + reasoning: 4.5, + cache_creation: 0.6, + }, + "kimi-latest": { input: 1.0, output: 4.0, cached: 0.5, reasoning: 6.0, cache_creation: 1.0 }, + }, + // MiniMax minimax: { "minimax-m2.1": { @@ -789,18 +863,18 @@ export const DEFAULT_PRICING = { // MiniMax M2.5 — mais barato que M2.1, reasoning + tools // Context: 204.800 tokens | Max Output: 16.384 tokens "minimax-m2.5": { - input: 0.3, - output: 1.2, - cached: 0.15, - reasoning: 1.8, - cache_creation: 0.3, + input: 0.27, + output: 0.95, + cached: 0.135, + reasoning: 1.425, + cache_creation: 0.27, }, "MiniMax-M2.5": { - input: 0.3, - output: 1.2, - cached: 0.15, - reasoning: 1.8, - cache_creation: 0.3, + input: 0.27, + output: 0.95, + cached: 0.135, + reasoning: 1.425, + cache_creation: 0.27, }, // T12: MiniMax M2.7 — new default model (sub2api PR #1120) // Upgraded from M2.5, same API endpoint api.minimax.io @@ -1107,11 +1181,11 @@ export const DEFAULT_PRICING = { // ───────────────────────────────────────────────────────────────────── zai: { "glm-5": { - input: 1.0, - output: 3.2, - cached: 0.5, - reasoning: 4.8, - cache_creation: 1.0, + input: 0.38, + output: 1.98, + cached: 0.19, + reasoning: 2.97, + cache_creation: 0.38, }, "glm-5-turbo": { input: 1.2, @@ -1120,6 +1194,13 @@ export const DEFAULT_PRICING = { reasoning: 6.0, cache_creation: 1.2, }, + "glm-4.7": { + input: 0.38, + output: 1.98, + cached: 0.19, + reasoning: 2.97, + cache_creation: 0.38, + }, }, kiro: { diff --git a/src/shared/services/cliRuntime.ts b/src/shared/services/cliRuntime.ts index bbfd706c..b8ef8a90 100644 --- a/src/shared/services/cliRuntime.ts +++ b/src/shared/services/cliRuntime.ts @@ -98,7 +98,7 @@ const CLI_TOOLS: Record = { // opencode takes several seconds on cold start environments healthcheckTimeoutMs: 15000, paths: { - config: ".config/opencode/config.toml", + config: ".config/opencode/opencode.json", }, }, }; @@ -337,9 +337,39 @@ export const ensureCliConfigWriteAllowed = () => { export const getCliConfigHome = () => String(process.env.CLI_CONFIG_HOME || "").trim() || os.homedir(); +export const resolveOpencodeConfigDir = ( + platform = process.platform, + env: NodeJS.ProcessEnv = process.env, + homeDir = os.homedir() +) => { + const isWin = platform === "win32"; + if (isWin) { + const appData = String(env.APPDATA || "").trim(); + return appData || path.join(homeDir, "AppData", "Roaming"); + } + + const xdgConfigHome = String(env.XDG_CONFIG_HOME || "").trim(); + return xdgConfigHome || path.join(homeDir, ".config"); +}; + +export const resolveOpencodeConfigPath = ( + platform = process.platform, + env: NodeJS.ProcessEnv = process.env, + homeDir = os.homedir() +) => path.join(resolveOpencodeConfigDir(platform, env, homeDir), "opencode", "opencode.json"); + +export const getOpenCodeConfigPath = () => resolveOpencodeConfigPath(); + export const getCliConfigPaths = (toolId: string) => { const tool = CLI_TOOLS[toolId]; if (!tool) return null; + + if (toolId === "opencode") { + return { + config: getOpenCodeConfigPath(), + }; + } + const home = getCliConfigHome(); return Object.fromEntries( Object.entries(tool.paths).map(([key, relativePath]) => [ diff --git a/src/shared/services/opencodeConfig.ts b/src/shared/services/opencodeConfig.ts new file mode 100644 index 00000000..da4305ea --- /dev/null +++ b/src/shared/services/opencodeConfig.ts @@ -0,0 +1,64 @@ +type OpenCodeConfigInput = { + baseUrl?: string; + apiKey?: string; + model?: string; +}; + +type OpenCodeProviderConfig = { + name: string; + api: "openai"; + baseURL: string; + apiKey: string; + models: string[]; +}; + +const OPENCODE_DEFAULT_MODELS = [ + "claude-opus-4-5-thinking", + "claude-sonnet-4-5-thinking", + "gemini-3.1-pro-high", + "gemini-3-flash", +] as const; + +const normalizeValue = (value: unknown) => + String(value || "") + .trim() + .replace(/^\/+/, ""); + +export const buildOpenCodeProviderConfig = ({ + baseUrl, + apiKey, + model, +}: OpenCodeConfigInput): OpenCodeProviderConfig => { + const normalizedBaseUrl = String(baseUrl || "") + .trim() + .replace(/\/+$/, ""); + const normalizedModel = normalizeValue(model); + + const uniqueModels = [...new Set([normalizedModel, ...OPENCODE_DEFAULT_MODELS].filter(Boolean))]; + + return { + name: "OmniRoute", + api: "openai", + baseURL: normalizedBaseUrl, + apiKey: apiKey || "sk_omniroute", + models: uniqueModels, + }; +}; + +export const mergeOpenCodeConfig = ( + existingConfig: Record | null | undefined, + input: OpenCodeConfigInput +) => { + const safeConfig = + existingConfig && typeof existingConfig === "object" && !Array.isArray(existingConfig) + ? existingConfig + : {}; + + return { + ...safeConfig, + providers: { + ...((safeConfig as any).providers || {}), + omniroute: buildOpenCodeProviderConfig(input), + }, + }; +}; diff --git a/src/shared/utils/apiKeyPolicy.ts b/src/shared/utils/apiKeyPolicy.ts index a261f1a7..211a074e 100644 --- a/src/shared/utils/apiKeyPolicy.ts +++ b/src/shared/utils/apiKeyPolicy.ts @@ -37,6 +37,7 @@ export interface ApiKeyMetadata { accessSchedule?: AccessSchedule | null; maxRequestsPerDay?: number | null; maxRequestsPerMinute?: number | null; + maxSessions?: number | null; } /** diff --git a/src/shared/validation/schemas.ts b/src/shared/validation/schemas.ts index 70d87421..a6a6d3a1 100644 --- a/src/shared/validation/schemas.ts +++ b/src/shared/validation/schemas.ts @@ -905,6 +905,7 @@ export const updateKeyPermissionsSchema = z noLog: z.boolean().optional(), autoResolve: z.boolean().optional(), isActive: z.boolean().optional(), + maxSessions: z.number().int().min(0).max(10000).optional(), accessSchedule: z.union([accessScheduleSchema, z.null()]).optional(), }) .superRefine((value, ctx) => { @@ -915,6 +916,7 @@ export const updateKeyPermissionsSchema = z value.noLog === undefined && value.autoResolve === undefined && value.isActive === undefined && + value.maxSessions === undefined && value.accessSchedule === undefined ) { ctx.addIssue({ @@ -1028,6 +1030,7 @@ export const providersBatchTestSchema = z export const validateProviderApiKeySchema = z.object({ provider: z.string().trim().min(1, "Provider and API key required"), apiKey: z.string().trim().min(1, "Provider and API key required"), + validationModelId: z.string().trim().optional(), }); const geminiPartSchema = z diff --git a/src/sse/handlers/chat.ts b/src/sse/handlers/chat.ts index e7339d7c..4ffdf46b 100644 --- a/src/sse/handlers/chat.ts +++ b/src/sse/handlers/chat.ts @@ -46,6 +46,14 @@ import { applyTaskAwareRouting, getTaskRoutingConfig, } from "@omniroute/open-sse/services/taskAwareRouter.ts"; +import { + generateSessionId as generateStableSessionId, + touchSession, + extractExternalSessionId, + checkSessionLimit, + registerKeySession, + isSessionRegisteredForKey, +} from "@omniroute/open-sse/services/sessionManager.ts"; import { isFallbackDecision, shouldUseFallback, @@ -161,6 +169,13 @@ export async function handleChat(request: any, clientRawRequest: any = null) { return errorResponse(HTTP_STATUS.BAD_REQUEST, "Missing model"); } + // T04: client-provided external session header has priority over generated fingerprint. + const externalSessionId = extractExternalSessionId(request.headers); + const sessionId = externalSessionId || generateStableSessionId(body); + if (sessionId) { + touchSession(sessionId); + } + // Pipeline: API key policy enforcement (model restrictions + budget limits) telemetry.startPhase("policy"); const policy = await enforceApiKeyPolicy(request, modelStr); @@ -174,6 +189,25 @@ export async function handleChat(request: any, clientRawRequest: any = null) { const apiKeyInfo = policy.apiKeyInfo; telemetry.endPhase(); + // T08: per-key active session limit (0 = unlimited). + if (apiKeyInfo?.id && sessionId) { + const maxSessions = + typeof apiKeyInfo.maxSessions === "number" && apiKeyInfo.maxSessions > 0 + ? apiKeyInfo.maxSessions + : 0; + + if (maxSessions > 0 && !isSessionRegisteredForKey(apiKeyInfo.id, sessionId)) { + const sessionViolation = checkSessionLimit(apiKeyInfo.id, maxSessions); + if (sessionViolation) { + return withSessionHeader( + errorResponse(HTTP_STATUS.RATE_LIMITED, sessionViolation.message), + sessionId + ); + } + registerKeySession(apiKeyInfo.id, sessionId); + } + } + // T05 — Task-Aware Smart Routing // Detect the semantic task type and optionally route to the optimal model let resolvedModelStr = modelStr; @@ -221,7 +255,8 @@ export async function handleChat(request: any, clientRawRequest: any = null) { const creds = await getProviderCredentials( provider, null, - apiKeyInfo?.allowedConnections ?? null + apiKeyInfo?.allowedConnections ?? null, + modelInfo.model || modelString ); if (!creds || creds.allRateLimited) return false; return true; @@ -238,7 +273,9 @@ export async function handleChat(request: any, clientRawRequest: any = null) { body, combo, handleSingleModel: (b: any, m: string) => - handleSingleModelChat(b, m, clientRawRequest, request, combo.name, apiKeyInfo, telemetry), + handleSingleModelChat(b, m, clientRawRequest, request, combo.name, apiKeyInfo, telemetry, { + sessionId, + }), isModelAvailable: checkModelAvailable, log, settings, @@ -247,7 +284,7 @@ export async function handleChat(request: any, clientRawRequest: any = null) { // Record telemetry recordTelemetry(telemetry); - return response; + return withSessionHeader(response, sessionId); } telemetry.endPhase(); @@ -259,10 +296,11 @@ export async function handleChat(request: any, clientRawRequest: any = null) { request, null, apiKeyInfo, - telemetry + telemetry, + { sessionId } ); recordTelemetry(telemetry); - return response; + return withSessionHeader(response, sessionId); } /** @@ -280,7 +318,7 @@ async function handleSingleModelChat( comboName: string | null = null, apiKeyInfo: any = null, telemetry: any = null, - runtimeOptions: { emergencyFallbackTried?: boolean } = {} + runtimeOptions: { emergencyFallbackTried?: boolean; sessionId?: string | null } = {} ) { // 1. Resolve model → provider/model const resolved = await resolveModelOrError(modelStr, body); @@ -310,7 +348,8 @@ async function handleSingleModelChat( const credentials = await getProviderCredentials( provider, excludeConnectionId, - apiKeyInfo?.allowedConnections ?? null + apiKeyInfo?.allowedConnections ?? null, + model ); if (!credentials || credentials.allRateLimited) { @@ -333,6 +372,9 @@ async function handleSingleModelChat( const accountId = credentials.connectionId.slice(0, 8); log.info("AUTH", `Using ${provider} account: ${accountId}...`); + if (runtimeOptions.sessionId) { + touchSession(runtimeOptions.sessionId, credentials.connectionId); + } const refreshedCredentials = await checkAndRefreshToken(provider, credentials); const proxyInfo = await safeResolveProxy(credentials.connectionId); @@ -604,6 +646,23 @@ async function executeChatWithBreaker({ tlsFingerprintUsed: false, }; } + + // T14: Proxy Fast-Fail should be converted into an upstream-unavailable result + // so account fallback logic can continue with another connection. + if (cbErr?.code === "PROXY_UNREACHABLE" || /proxy unreachable/i.test(cbErr?.message || "")) { + const detail = cbErr?.message || "Proxy unreachable"; + log.warn("PROXY", detail); + return { + result: { + success: false, + response: (unavailableResponse as any)(HTTP_STATUS.SERVICE_UNAVAILABLE, detail, 2), + status: HTTP_STATUS.SERVICE_UNAVAILABLE, + error: detail, + }, + tlsFingerprintUsed: false, + }; + } + throw cbErr; } } @@ -710,3 +769,20 @@ function safeLogEvents({ }); } catch {} } + +function withSessionHeader(response: Response, sessionId: string | null): Response { + if (!response || !sessionId) return response; + + try { + response.headers.set("X-OmniRoute-Session-Id", sessionId); + return response; + } catch { + const cloned = new Response(response.body, { + status: response.status, + statusText: response.statusText, + headers: response.headers, + }); + cloned.headers.set("X-OmniRoute-Session-Id", sessionId); + return cloned; + } +} diff --git a/src/sse/services/auth.ts b/src/sse/services/auth.ts index 97ca748d..757bff7d 100644 --- a/src/sse/services/auth.ts +++ b/src/sse/services/auth.ts @@ -16,6 +16,7 @@ import { } from "@omniroute/open-sse/services/accountFallback.ts"; import { isLocalProvider } from "@omniroute/open-sse/config/providerRegistry.ts"; import { COOLDOWN_MS } from "@omniroute/open-sse/config/constants.ts"; +import { getCodexModelScope } from "@omniroute/open-sse/executors/codex.ts"; import * as log from "../utils/logger"; import { fisherYatesShuffle, getNextFromDeckSync } from "@/shared/utils/shuffleDeck"; @@ -166,6 +167,56 @@ function applyCodexWindowPolicy(rawWindows: string[], providerSpecificData: Json return uniqueWindows(windows); } +function getCodexScopeRateLimitedUntil( + providerSpecificData: JsonRecord, + model: string | null +): string | null { + if (!model) return null; + const scope = getCodexModelScope(model); + const scopeMap = asRecord(providerSpecificData.codexScopeRateLimitedUntil); + const value = scopeMap[scope]; + return typeof value === "string" && value.trim().length > 0 ? value : null; +} + +function isCodexScopeUnavailable( + connection: ProviderConnectionView, + model: string | null +): boolean { + const until = getCodexScopeRateLimitedUntil(connection.providerSpecificData, model); + if (!until) return false; + return new Date(until).getTime() > Date.now(); +} + +function getEarliestCodexScopeRateLimitedUntil( + connections: ProviderConnectionView[], + model: string | null +): string | null { + let earliest: string | null = null; + let earliestMs = Infinity; + + for (const conn of connections) { + const until = getCodexScopeRateLimitedUntil(conn.providerSpecificData, model); + if (!until) continue; + const ms = new Date(until).getTime(); + if (!Number.isFinite(ms) || ms <= Date.now()) continue; + if (ms < earliestMs) { + earliest = until; + earliestMs = ms; + } + } + + return earliest; +} + +function normalizeStatus(value: string | null): string { + return (value || "").trim().toLowerCase(); +} + +function isTerminalConnectionStatus(connection: ProviderConnectionView): boolean { + const status = normalizeStatus(connection.testStatus); + return status === "credits_exhausted" || status === "banned" || status === "expired"; +} + export function resolveQuotaLimitPolicy( provider: string, providerSpecificData: JsonRecord @@ -259,7 +310,8 @@ export { fisherYatesShuffle, getNextFromDeckSync as getNextFromDeck }; export async function getProviderCredentials( provider: string, excludeConnectionId: string | null = null, - allowedConnections: string[] | null = null + allowedConnections: string[] | null = null, + requestedModel: string | null = null ) { // Acquire mutex to prevent race conditions const currentMutex = selectionMutex; @@ -320,6 +372,8 @@ export async function getProviderCredentials( const availableConnections = connections.filter((c) => { if (excludeConnectionId && c.id === excludeConnectionId) return false; if (isAccountUnavailable(c.rateLimitedUntil)) return false; + if (isTerminalConnectionStatus(c)) return false; + if (provider === "codex" && isCodexScopeUnavailable(c, requestedModel)) return false; return true; }); @@ -330,16 +384,27 @@ export async function getProviderCredentials( connections.forEach((c) => { const excluded = excludeConnectionId && c.id === excludeConnectionId; const rateLimited = isAccountUnavailable(c.rateLimitedUntil); + const terminalStatus = isTerminalConnectionStatus(c); + const codexScopeLimited = provider === "codex" && isCodexScopeUnavailable(c, requestedModel); if (excluded || rateLimited) { log.debug( "AUTH", ` → ${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""}` ); + } else if (terminalStatus) { + log.debug("AUTH", ` → ${c.id?.slice(0, 8)} | skipped terminal status=${c.testStatus}`); + } else if (codexScopeLimited) { + const scopeUntil = getCodexScopeRateLimitedUntil(c.providerSpecificData, requestedModel); + log.debug("AUTH", ` → ${c.id?.slice(0, 8)} | codex scope-limited until ${scopeUntil}`); } }); if (availableConnections.length === 0) { - const earliest = getEarliestRateLimitedUntil(connections); + const earliest = + getEarliestRateLimitedUntil(connections) || + (provider === "codex" + ? getEarliestCodexScopeRateLimitedUntil(connections, requestedModel) + : null); if (earliest) { // Find the connection with the earliest rateLimitedUntil to get its error info const rateLimitedConns = connections.filter( @@ -618,6 +683,15 @@ export async function markAccountUnavailable( const conn = connections.find((connection) => connection.id === connectionId); const backoffLevel = conn?.backoffLevel || 0; + // T06/T10/T36: terminal statuses should not be overwritten by transient cooldown state. + if (conn && isTerminalConnectionStatus(conn)) { + log.info( + "AUTH", + `${connectionId.slice(0, 8)} terminal status=${conn.testStatus}, skipping cooldown overwrite` + ); + return { shouldFallback: true, cooldownMs: 0 }; + } + // ─── Anti-Thundering Herd Guard ───────────────────────────────── // If this connection was ALREADY marked unavailable by a prior concurrent // request (within the mutex window), skip re-marking to avoid resetting @@ -633,6 +707,24 @@ export async function markAccountUnavailable( }; } + // T09: Codex scope-aware lockout guard (codex vs spark independent pools). + if (provider === "codex" && model) { + const scopeRateLimitedUntil = getCodexScopeRateLimitedUntil( + conn?.providerSpecificData || {}, + model + ); + if (scopeRateLimitedUntil && new Date(scopeRateLimitedUntil).getTime() > Date.now()) { + log.info( + "AUTH", + `${connectionId.slice(0, 8)} already scope-limited for ${getCodexModelScope(model)} (until ${scopeRateLimitedUntil}), skipping duplicate mark` + ); + return { + shouldFallback: true, + cooldownMs: new Date(scopeRateLimitedUntil).getTime() - Date.now(), + }; + } + } + const { shouldFallback, cooldownMs, newBackoffLevel, reason } = checkFallbackError( status, errorText, @@ -662,6 +754,40 @@ export async function markAccountUnavailable( const rateLimitedUntil = getUnavailableUntil(cooldownMs); const errorMsg = typeof errorText === "string" ? errorText.slice(0, 100) : "Provider error"; + // T09: Codex per-scope lockout (do not block the whole account globally). + if (provider === "codex" && status === 429 && model && conn) { + const scope = getCodexModelScope(model); + const existingScopeMap = asRecord(conn.providerSpecificData.codexScopeRateLimitedUntil); + const persistedScopeUntil = getCodexScopeRateLimitedUntil(conn.providerSpecificData, model); + const scopeRateLimitedUntil = persistedScopeUntil || rateLimitedUntil; + const scopeCooldownMs = Math.max(new Date(scopeRateLimitedUntil).getTime() - Date.now(), 0); + + await updateProviderConnection(connectionId, { + testStatus: "unavailable", + lastError: errorMsg, + errorCode: status, + lastErrorAt: new Date().toISOString(), + backoffLevel: newBackoffLevel ?? backoffLevel, + providerSpecificData: { + ...conn.providerSpecificData, + codexScopeRateLimitedUntil: { + ...existingScopeMap, + [scope]: scopeRateLimitedUntil, + }, + }, + }); + + if (scopeCooldownMs > 0) { + lockModel(provider, connectionId, model, reason || "unknown", scopeCooldownMs); + } + + if (status && errorMsg) { + console.error(`❌ ${provider} [${status}] (${scope}): ${errorMsg}`); + } + + return { shouldFallback: true, cooldownMs: scopeCooldownMs }; + } + await updateProviderConnection(connectionId, { rateLimitedUntil, testStatus: "unavailable", diff --git a/tests/unit/background-task-detector.test.mjs b/tests/unit/background-task-detector.test.mjs index b483b155..bfaa9731 100644 --- a/tests/unit/background-task-detector.test.mjs +++ b/tests/unit/background-task-detector.test.mjs @@ -3,6 +3,7 @@ import assert from "node:assert/strict"; const { isBackgroundTask, + getBackgroundTaskReason, getDegradedModel, setBackgroundDegradationConfig, getBackgroundDegradationConfig, @@ -68,6 +69,26 @@ test("isBackgroundTask: detects X-Request-Priority header", () => { assert.equal(isBackgroundTask(body, headers), true); }); +test("isBackgroundTask: detects X-Task-Type header", () => { + const body = { + model: "claude-sonnet-4", + messages: [{ role: "user", content: "hello" }], + }; + const headers = { "x-task-type": "background" }; + assert.equal(isBackgroundTask(body, headers), true); + assert.equal(getBackgroundTaskReason(body, headers), "header_background"); +}); + +test("isBackgroundTask: detects low max_tokens requests", () => { + const body = { + model: "claude-sonnet-4", + max_tokens: 32, + messages: [{ role: "user", content: "hello" }], + }; + assert.equal(isBackgroundTask(body), true); + assert.equal(getBackgroundTaskReason(body), "low_max_tokens"); +}); + test("isBackgroundTask: returns false for null/undefined body", () => { assert.equal(isBackgroundTask(null), false); assert.equal(isBackgroundTask(undefined), false); @@ -81,8 +102,8 @@ test("isBackgroundTask: returns false for empty messages", () => { test("getDegradedModel: returns cheaper model from map", () => { resetStats(); - assert.equal(getDegradedModel("claude-opus-4-6"), "gemini-2.5-flash"); - assert.equal(getDegradedModel("gemini-2.5-pro"), "gemini-2.5-flash"); + assert.equal(getDegradedModel("claude-opus-4-6"), "gemini-3-flash"); + assert.equal(getDegradedModel("gemini-2.5-pro"), "gemini-3-flash"); assert.equal(getDegradedModel("gpt-4o"), "gpt-4o-mini"); }); diff --git a/tests/unit/call-logs-requested-model.test.mjs b/tests/unit/call-logs-requested-model.test.mjs new file mode 100644 index 00000000..c94b55f4 --- /dev/null +++ b/tests/unit/call-logs-requested-model.test.mjs @@ -0,0 +1,52 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const TEST_DATA_DIR = fs.mkdtempSync(path.join(os.tmpdir(), "omniroute-calllogs-rm-")); +process.env.DATA_DIR = TEST_DATA_DIR; + +const core = await import("../../src/lib/db/core.ts"); +const callLogs = await import("../../src/lib/usage/callLogs.ts"); + +async function resetStorage() { + core.resetDbInstance(); + fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true }); + fs.mkdirSync(TEST_DATA_DIR, { recursive: true }); +} + +test.beforeEach(async () => { + await resetStorage(); +}); + +test.after(() => { + core.resetDbInstance(); + fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true }); +}); + +test("call logs persist requestedModel and allow filtering by requested model", async () => { + await callLogs.saveCallLog({ + method: "POST", + path: "/v1/chat/completions", + status: 200, + model: "openai/gpt-5.2-mini", + requestedModel: "openai/gpt-5.2-codex", + provider: "openai", + duration: 123, + requestBody: { messages: [{ role: "user", content: "hello" }] }, + responseBody: { id: "resp_1" }, + }); + + const all = await callLogs.getCallLogs({ limit: 10 }); + assert.equal(all.length, 1); + assert.equal(all[0].model, "openai/gpt-5.2-mini"); + assert.equal(all[0].requestedModel, "openai/gpt-5.2-codex"); + + const byRequested = await callLogs.getCallLogs({ model: "gpt-5.2-codex", limit: 10 }); + assert.equal(byRequested.length, 1); + assert.equal(byRequested[0].requestedModel, "openai/gpt-5.2-codex"); + + const detail = await callLogs.getCallLogById(all[0].id); + assert.equal(detail?.requestedModel, "openai/gpt-5.2-codex"); +}); diff --git a/tests/unit/fixes-p1.test.mjs b/tests/unit/fixes-p1.test.mjs index a672ab31..636543b0 100644 --- a/tests/unit/fixes-p1.test.mjs +++ b/tests/unit/fixes-p1.test.mjs @@ -3,6 +3,7 @@ import assert from "node:assert/strict"; import fs from "node:fs"; import os from "node:os"; import path from "node:path"; +import net from "node:net"; const isWindows = process.platform === "win32"; const TEST_DATA_DIR = fs.mkdtempSync(path.join(os.tmpdir(), "omniroute-fixes-")); @@ -342,11 +343,29 @@ test("proxy fetch rejects socks5 context when feature flag is disabled", async ( test("proxy fetch accepts socks5 context when feature flag is enabled", async () => { await withEnv("ENABLE_SOCKS5_PROXY", "true", async () => { - const result = await proxyFetch.runWithProxyContext( - { type: "socks5", host: "127.0.0.1", port: "1080" }, - async () => "ok" - ); - assert.equal(result, "ok"); + const server = net.createServer(); + await new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(0, "127.0.0.1", resolve); + }); + + const address = server.address(); + assert.ok(address && typeof address === "object"); + + try { + const result = await proxyFetch.runWithProxyContext( + { type: "socks5", host: "127.0.0.1", port: String(address.port) }, + async () => "ok" + ); + assert.equal(result, "ok"); + } finally { + await new Promise((resolve, reject) => { + server.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); + } }); }); diff --git a/tests/unit/openai-to-claude-strip-empty.test.mjs b/tests/unit/openai-to-claude-strip-empty.test.mjs index 497948c1..b139853c 100644 --- a/tests/unit/openai-to-claude-strip-empty.test.mjs +++ b/tests/unit/openai-to-claude-strip-empty.test.mjs @@ -1,7 +1,7 @@ import test from "node:test"; import assert from "node:assert/strict"; -const { stripEmptyTextBlocks, openaiToClaudeRequest } = +const { stripEmptyTextBlocks, openaiToClaudeRequest, normalizeContentToString } = await import("../../open-sse/translator/request/openai-to-claude.ts"); test("stripEmptyTextBlocks removes empty text recursively inside tool_result content", () => { @@ -74,3 +74,34 @@ test("openaiToClaudeRequest applies strip to tool message array content", () => const toolResult = toolMessage.content.find((b) => b.type === "tool_result"); assert.deepEqual(toolResult.content, [{ type: "text", text: "tool ok" }]); }); + +test("T15: normalizeContentToString supports array-form content blocks", () => { + const text = normalizeContentToString([ + { type: "text", text: "line 1" }, + { type: "image_url", image_url: { url: "data:image/png;base64,abc" } }, + { type: "text", text: "line 2" }, + ]); + + assert.equal(text, "line 1\nline 2"); +}); + +test("T15: openaiToClaudeRequest converts system array content into a Claude system text block", () => { + const request = { + messages: [ + { + role: "system", + content: [ + { type: "text", text: "System rules A" }, + { type: "image_url", image_url: { url: "data:image/png;base64,abc" } }, + { type: "text", text: "System rules B" }, + ], + }, + { role: "user", content: "hello" }, + ], + }; + + const translated = openaiToClaudeRequest("claude-sonnet-4", request, false); + assert.ok(Array.isArray(translated.system)); + // system[0] is the injected Claude prompt; user-provided system content is system[1]. + assert.equal(translated.system[1].text, "System rules A\nSystem rules B"); +}); diff --git a/tests/unit/t07-no-log-key-config.test.mjs b/tests/unit/t07-no-log-key-config.test.mjs index a368e5c0..9eeaf922 100644 --- a/tests/unit/t07-no-log-key-config.test.mjs +++ b/tests/unit/t07-no-log-key-config.test.mjs @@ -59,6 +59,11 @@ test("updateKeyPermissionsSchema accepts noLog-only updates and rejects empty pa const noLogOnly = schemas.validateBody(schemas.updateKeyPermissionsSchema, { noLog: true }); assert.equal(noLogOnly.success, true); + const maxSessionsOnly = schemas.validateBody(schemas.updateKeyPermissionsSchema, { + maxSessions: 3, + }); + assert.equal(maxSessionsOnly.success, true); + const emptyPayload = schemas.validateBody(schemas.updateKeyPermissionsSchema, {}); assert.equal(emptyPayload.success, false); }); diff --git a/tests/unit/t12-pricing-updates.test.mjs b/tests/unit/t12-pricing-updates.test.mjs new file mode 100644 index 00000000..e9e511d4 --- /dev/null +++ b/tests/unit/t12-pricing-updates.test.mjs @@ -0,0 +1,34 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { getDefaultPricing } from "../../src/shared/constants/pricing.ts"; +import { REGISTRY } from "../../open-sse/config/providerRegistry.ts"; + +test("T12: pricing table includes MiniMax, GLM, Kimi and gpt-5.4 mini entries", () => { + const pricing = getDefaultPricing(); + + assert.ok(pricing.cx["gpt-5.4"], "missing cx/gpt-5.4"); + assert.ok(pricing.cx["gpt-5.4-mini"], "missing cx/gpt-5.4-mini"); + + assert.ok(pricing.minimax["minimax-m2.5"], "missing minimax/minimax-m2.5"); + assert.ok(pricing.minimax["minimax-m2.7"], "missing minimax/minimax-m2.7"); + assert.equal(pricing.minimax["minimax-m2.5"].input, 0.27); + assert.equal(pricing.minimax["minimax-m2.5"].output, 0.95); + + assert.ok(pricing.glm["glm-4.7"], "missing glm/glm-4.7"); + assert.ok(pricing.glm["glm-5"], "missing glm/glm-5"); + assert.equal(pricing.glm["glm-4.7"].input, 0.38); + assert.equal(pricing.glm["glm-4.7"].output, 1.98); + + assert.ok(pricing.kimi["kimi-k2.5"], "missing kimi/kimi-k2.5"); + assert.ok(pricing.kimi["kimi-k2.5-thinking"], "missing kimi/kimi-k2.5-thinking"); + assert.ok(pricing.kimi["kimi-for-coding"], "missing kimi/kimi-for-coding"); +}); + +test("T12: minimax default model list starts with M2.7", () => { + const minimaxModels = REGISTRY.minimax.models.map((m) => m.id); + const minimaxCnModels = REGISTRY["minimax-cn"].models.map((m) => m.id); + + assert.equal(minimaxModels[0], "minimax-m2.7"); + assert.equal(minimaxCnModels[0], "minimax-m2.7"); +}); diff --git a/tests/unit/t13-stale-quota-display.test.mjs b/tests/unit/t13-stale-quota-display.test.mjs new file mode 100644 index 00000000..57f5d144 --- /dev/null +++ b/tests/unit/t13-stale-quota-display.test.mjs @@ -0,0 +1,31 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { parseQuotaData } from "../../src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx"; + +test("T13: parseQuotaData zeroes usage when resetAt is already in the past", () => { + const past = new Date(Date.now() - 60_000).toISOString(); + const parsed = parseQuotaData("codex", { + quotas: { + session: { used: 83, total: 100, resetAt: past }, + }, + }); + + assert.equal(parsed.length, 1); + assert.equal(parsed[0].used, 0); + assert.equal(parsed[0].staleAfterReset, true); + assert.equal(parsed[0].remainingPercentage, 100); +}); + +test("T13: parseQuotaData keeps usage unchanged when resetAt is in the future", () => { + const future = new Date(Date.now() + 60_000).toISOString(); + const parsed = parseQuotaData("codex", { + quotas: { + session: { used: 42, total: 100, resetAt: future }, + }, + }); + + assert.equal(parsed.length, 1); + assert.equal(parsed[0].used, 42); + assert.equal(parsed[0].staleAfterReset, false); +}); diff --git a/tests/unit/t14-proxy-fast-fail.test.mjs b/tests/unit/t14-proxy-fast-fail.test.mjs new file mode 100644 index 00000000..20a8b113 --- /dev/null +++ b/tests/unit/t14-proxy-fast-fail.test.mjs @@ -0,0 +1,35 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { + isProxyReachable, + getCachedProxyHealth, + invalidateProxyHealth, +} from "../../src/lib/proxyHealth.ts"; +import { runWithProxyContext } from "../../open-sse/utils/proxyFetch.ts"; + +test("T14: isProxyReachable caches unreachable proxy result", async () => { + const proxyUrl = "http://127.0.0.1:1"; + invalidateProxyHealth(proxyUrl); + + const healthy = await isProxyReachable(proxyUrl, 120, 2_000); + assert.equal(healthy, false); + assert.equal(getCachedProxyHealth(proxyUrl), false); +}); + +test("T14: runWithProxyContext fast-fails when proxy is unreachable", async () => { + const proxyUrl = "http://127.0.0.1:1"; + invalidateProxyHealth(proxyUrl); + + let executed = false; + await assert.rejects( + () => + runWithProxyContext(proxyUrl, async () => { + executed = true; + return "ok"; + }), + (err) => err?.code === "PROXY_UNREACHABLE" + ); + + assert.equal(executed, false); +}); diff --git a/tests/unit/t16-gemini-enum-type-string.test.mjs b/tests/unit/t16-gemini-enum-type-string.test.mjs new file mode 100644 index 00000000..c921ee52 --- /dev/null +++ b/tests/unit/t16-gemini-enum-type-string.test.mjs @@ -0,0 +1,53 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +const { cleanJSONSchemaForAntigravity } = + await import("../../open-sse/translator/helpers/geminiHelper.ts"); + +test("T16: enum-only fields gain type:string after Gemini schema cleanup", () => { + const schema = { + type: "object", + properties: { + mode: { + enum: ["fast", "balanced", "slow"], + }, + }, + required: ["mode"], + }; + + const cleaned = cleanJSONSchemaForAntigravity(schema); + assert.equal(cleaned.properties.mode.type, "string"); + assert.deepEqual(cleaned.properties.mode.enum, ["fast", "balanced", "slow"]); +}); + +test("T16: existing explicit type:string is preserved", () => { + const schema = { + type: "object", + properties: { + mode: { + type: "string", + enum: ["auto", "manual"], + }, + }, + }; + + const cleaned = cleanJSONSchemaForAntigravity(schema); + assert.equal(cleaned.properties.mode.type, "string"); + assert.deepEqual(cleaned.properties.mode.enum, ["auto", "manual"]); +}); + +test("T16: schemas without enum are not forced to string", () => { + const schema = { + type: "object", + properties: { + retries: { + type: "number", + minimum: 0, + }, + }, + }; + + const cleaned = cleanJSONSchemaForAntigravity(schema); + assert.equal(cleaned.properties.retries.type, "number"); + assert.equal(cleaned.properties.retries.enum, undefined); +}); diff --git a/tests/unit/t19-codex-responses-empty-content.test.mjs b/tests/unit/t19-codex-responses-empty-content.test.mjs new file mode 100644 index 00000000..cc488073 --- /dev/null +++ b/tests/unit/t19-codex-responses-empty-content.test.mjs @@ -0,0 +1,66 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +const { translateNonStreamingResponse } = + await import("../../open-sse/handlers/responseTranslator.ts"); +const { FORMATS } = await import("../../open-sse/translator/formats.ts"); + +test("T19: picks the last non-empty message content from Responses output", () => { + const responseBody = { + object: "response", + id: "resp_t19", + model: "gpt-5.2-codex", + created_at: 1710000000, + output: [ + { + type: "message", + content: [{ type: "output_text", text: "" }], + }, + { + type: "reasoning", + summary: [{ type: "summary_text", text: "thinking..." }], + }, + { + type: "message", + content: [{ type: "output_text", text: "Resposta final" }], + }, + ], + usage: { input_tokens: 10, output_tokens: 5 }, + }; + + const translated = translateNonStreamingResponse( + responseBody, + FORMATS.OPENAI_RESPONSES, + FORMATS.OPENAI + ); + + assert.equal(translated.choices[0].message.content, "Resposta final"); +}); + +test("T19: falls back to last message block when all message texts are empty", () => { + const responseBody = { + object: "response", + id: "resp_t19_empty", + model: "gpt-5.2-codex", + created_at: 1710000001, + output: [ + { + type: "message", + content: [{ type: "output_text", text: "" }], + }, + { + type: "message", + content: [{ type: "output_text", text: "" }], + }, + ], + }; + + const translated = translateNonStreamingResponse( + responseBody, + FORMATS.OPENAI_RESPONSES, + FORMATS.OPENAI + ); + + assert.equal(translated.choices[0].message.content, ""); + assert.equal(translated.choices[0].finish_reason, "stop"); +}); diff --git a/tests/unit/t20-t22-provider-headers.test.mjs b/tests/unit/t20-t22-provider-headers.test.mjs new file mode 100644 index 00000000..58325929 --- /dev/null +++ b/tests/unit/t20-t22-provider-headers.test.mjs @@ -0,0 +1,31 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { platform, arch } from "node:os"; + +const { REGISTRY } = await import("../../open-sse/config/providerRegistry.ts"); + +test("T20: antigravity config has updated User-Agent and sandbox fallback URL", () => { + const antigravity = REGISTRY.antigravity; + assert.ok(Array.isArray(antigravity.baseUrls)); + assert.ok(antigravity.baseUrls.includes("https://daily-cloudcode-pa.sandbox.googleapis.com")); + assert.match( + antigravity.headers["User-Agent"], + new RegExp(`^antigravity/1\\.107\\.0\\s+${platform()}\\/${arch()}$`) + ); +}); + +test("T22: github headers include updated editor/plugin versions and required fields", () => { + const github = REGISTRY.github; + assert.equal(github.headers["editor-version"], "vscode/1.110.0"); + assert.equal(github.headers["editor-plugin-version"], "copilot-chat/0.38.0"); + assert.equal(github.headers["user-agent"], "GitHubCopilotChat/0.38.0"); + assert.equal(github.headers["x-github-api-version"], "2025-04-01"); + assert.equal(github.headers["x-vscode-user-agent-library-version"], "electron-fetch"); + assert.equal(github.headers["X-Initiator"], "user"); +}); + +test("T22: github config exposes dedicated responses endpoint", () => { + const github = REGISTRY.github; + assert.equal(github.responsesBaseUrl, "https://api.githubcopilot.com/responses"); + assert.equal(github.baseUrl, "https://api.githubcopilot.com/chat/completions"); +}); diff --git a/tests/unit/t23-t24-fallback-resilience.test.mjs b/tests/unit/t23-t24-fallback-resilience.test.mjs new file mode 100644 index 00000000..55b6e22d --- /dev/null +++ b/tests/unit/t23-t24-fallback-resilience.test.mjs @@ -0,0 +1,141 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +const { checkFallbackError } = await import("../../open-sse/services/accountFallback.ts"); +const { handleComboChat } = await import("../../open-sse/services/combo.ts"); +const { resetAllCircuitBreakers } = await import("../../src/shared/utils/circuitBreaker.ts"); + +test.beforeEach(() => { + resetAllCircuitBreakers(); +}); + +function createLog() { + const entries = []; + return { + info: (tag, msg) => entries.push({ level: "info", tag, msg }), + warn: (tag, msg) => entries.push({ level: "warn", tag, msg }), + error: (tag, msg) => entries.push({ level: "error", tag, msg }), + entries, + }; +} + +function createStatusSequenceHandler(sequence) { + let idx = 0; + return async () => { + const step = sequence[idx++] || { status: 200 }; + if (step.status === 200) { + return new Response(JSON.stringify({ ok: true }), { status: 200 }); + } + return new Response( + JSON.stringify({ + error: { message: step.message || `Error ${step.status}` }, + }), + { + status: step.status, + headers: step.headers || { "content-type": "application/json" }, + } + ); + }; +} + +test("T23: 429 with long Retry-After uses real reset cooldown instead of short exponential backoff", () => { + const headers = new Headers({ "retry-after": "3600" }); + const result = checkFallbackError(429, "Rate limit exceeded", 2, null, "groq", headers); + + assert.equal(result.shouldFallback, true); + assert.equal(result.reason, "rate_limit_exceeded"); + assert.equal(result.newBackoffLevel, 0); + assert.ok(result.cooldownMs > 3_590_000); +}); + +test("T24: combo awaits short 503 cooldown before falling through to next model", async () => { + const log = createLog(); + + const result = await handleComboChat({ + body: {}, + combo: { + name: "t24-short-cooldown", + strategy: "priority", + models: [ + { model: "groq/model-a", weight: 0 }, + { model: "groq/model-b", weight: 0 }, + ], + }, + // Two transient failures on first model, then success on fallback model. + handleSingleModel: createStatusSequenceHandler([ + { status: 503 }, + { status: 503 }, + { status: 200 }, + ]), + isModelAvailable: () => true, + log, + settings: null, + allCombos: null, + }); + + assert.equal(result.ok, true); + const waitLog = log.entries.find((e) => e.msg.includes("Waiting") && e.msg.includes("fallback")); + assert.ok(waitLog); +}); + +test("T24: combo skips wait when 503 cooldown is long (>5s)", async () => { + const log = createLog(); + + const result = await handleComboChat({ + body: {}, + combo: { + name: "t24-long-cooldown", + strategy: "priority", + models: [ + { model: "groq/model-a", weight: 0 }, + { model: "groq/model-b", weight: 0 }, + ], + }, + handleSingleModel: createStatusSequenceHandler([ + { + status: 503, + message: "rate limit exceeded", + headers: { "content-type": "application/json", "retry-after": "120" }, + }, + { + status: 503, + message: "rate limit exceeded", + headers: { "content-type": "application/json", "retry-after": "120" }, + }, + { status: 200 }, + ]), + isModelAvailable: () => true, + log, + settings: null, + allCombos: null, + }); + + assert.equal(result.ok, true); + const waitLog = log.entries.find((e) => e.msg.includes("Waiting") && e.msg.includes("fallback")); + assert.equal(waitLog, undefined); +}); + +test("T24: all inactive accounts return 503 service_unavailable (not 406)", async () => { + const result = await handleComboChat({ + body: {}, + combo: { + name: "t24-all-inactive", + strategy: "priority", + models: [ + { model: "groq/model-a", weight: 0 }, + { model: "groq/model-b", weight: 0 }, + ], + }, + handleSingleModel: async () => { + throw new Error("handleSingleModel should not be called when all models are unavailable"); + }, + isModelAvailable: () => false, + log: createLog(), + settings: null, + allCombos: null, + }); + + assert.equal(result.status, 503); + const body = await result.json(); + assert.equal(body.error?.code, "ALL_ACCOUNTS_INACTIVE"); +}); diff --git a/tests/unit/t25-provider-validation-modelid-fallback.test.mjs b/tests/unit/t25-provider-validation-modelid-fallback.test.mjs new file mode 100644 index 00000000..7912d30d --- /dev/null +++ b/tests/unit/t25-provider-validation-modelid-fallback.test.mjs @@ -0,0 +1,116 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +const { validateProviderApiKey } = await import("../../src/lib/providers/validation.ts"); + +test("T25: openai-compatible validation succeeds directly when /models works", async () => { + const originalFetch = globalThis.fetch; + const calls = []; + + globalThis.fetch = async (url) => { + calls.push(String(url)); + return new Response(JSON.stringify({ data: [] }), { status: 200 }); + }; + + try { + const result = await validateProviderApiKey({ + provider: "openai-compatible-chat-t25-models-ok", + apiKey: "sk-test", + providerSpecificData: { baseUrl: "https://api.example.com/v1" }, + }); + + assert.equal(result.valid, true); + assert.equal(result.method, "models_endpoint"); + assert.equal(calls.length, 1); + assert.equal(calls[0], "https://api.example.com/v1/models"); + } finally { + globalThis.fetch = originalFetch; + } +}); + +test("T25: /models unavailable without Model ID returns actionable guidance", async () => { + const originalFetch = globalThis.fetch; + let callCount = 0; + + globalThis.fetch = async () => { + callCount += 1; + return new Response(JSON.stringify({ error: "Not Found" }), { status: 404 }); + }; + + try { + const result = await validateProviderApiKey({ + provider: "openai-compatible-chat-t25-no-model-id", + apiKey: "sk-test", + providerSpecificData: { baseUrl: "https://api.example.com/v1" }, + }); + + assert.equal(result.valid, false); + assert.match(result.error, /Provide a Model ID/i); + // Must stop after /models when no custom model was provided. + assert.equal(callCount, 1); + } finally { + globalThis.fetch = originalFetch; + } +}); + +test("T25: fallback chat probe detects invalid credentials with custom Model ID", async () => { + const originalFetch = globalThis.fetch; + const calls = []; + + globalThis.fetch = async (url) => { + calls.push(String(url)); + if (String(url).endsWith("/models")) { + return new Response(JSON.stringify({ error: "Not Found" }), { status: 404 }); + } + return new Response(JSON.stringify({ error: "Unauthorized" }), { status: 401 }); + }; + + try { + const result = await validateProviderApiKey({ + provider: "openai-compatible-chat-t25-auth", + apiKey: "bad-key", + providerSpecificData: { + baseUrl: "https://api.example.com/v1", + validationModelId: "grok-3", + }, + }); + + assert.equal(result.valid, false); + assert.equal(result.error, "Invalid API key"); + assert.deepEqual(calls, [ + "https://api.example.com/v1/models", + "https://api.example.com/v1/chat/completions", + ]); + } finally { + globalThis.fetch = originalFetch; + } +}); + +test("T25: fallback chat probe treats 429 as valid credentials with warning", async () => { + const originalFetch = globalThis.fetch; + + globalThis.fetch = async (url) => { + if (String(url).endsWith("/models")) { + throw new Error("connect ECONNREFUSED"); + } + return new Response(JSON.stringify({ error: "Rate limited" }), { status: 429 }); + }; + + try { + const result = await validateProviderApiKey({ + provider: "openai-compatible-chat-t25-rate-limit", + apiKey: "sk-test", + providerSpecificData: { + baseUrl: "https://api.example.com/v1", + validationModelId: "meta-llama/Llama-3.1-8B-Instruct", + }, + }); + + assert.equal(result.valid, true); + assert.equal(result.error, null); + assert.equal(result.method, "chat_completions"); + assert.match(result.warning, /Rate limited/i); + } finally { + globalThis.fetch = originalFetch; + } +}); diff --git a/tests/unit/t26-ai-sdk-accept-header-compat.test.mjs b/tests/unit/t26-ai-sdk-accept-header-compat.test.mjs new file mode 100644 index 00000000..9f9d44e8 --- /dev/null +++ b/tests/unit/t26-ai-sdk-accept-header-compat.test.mjs @@ -0,0 +1,30 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +const { clientWantsJsonResponse, resolveStreamFlag, stripMarkdownCodeFence } = + await import("../../open-sse/utils/aiSdkCompat.ts"); + +test("T26: Accept application/json disables SSE stream mode", () => { + assert.equal(clientWantsJsonResponse("application/json"), true); + assert.equal(resolveStreamFlag(true, "application/json"), false); +}); + +test("T26: text/event-stream keeps SSE behavior", () => { + assert.equal(clientWantsJsonResponse("text/event-stream"), false); + assert.equal(resolveStreamFlag(true, "text/event-stream"), true); +}); + +test("T26: mixed Accept header prefers SSE only when text/event-stream is present", () => { + assert.equal(clientWantsJsonResponse("application/json, text/event-stream"), false); + assert.equal(resolveStreamFlag(true, "application/json, text/event-stream"), true); +}); + +test("T26: markdown code fence stripping unwraps Claude JSON blocks", () => { + const wrapped = '```json\n{"name":"omniroute"}\n```'; + assert.equal(stripMarkdownCodeFence(wrapped), '{"name":"omniroute"}'); +}); + +test("T26: non-fenced content is returned unchanged", () => { + const plain = '{"name":"omniroute"}'; + assert.equal(stripMarkdownCodeFence(plain), plain); +}); diff --git a/tests/unit/t27-github-copilot-response-format.test.mjs b/tests/unit/t27-github-copilot-response-format.test.mjs new file mode 100644 index 00000000..61841358 --- /dev/null +++ b/tests/unit/t27-github-copilot-response-format.test.mjs @@ -0,0 +1,84 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +const { GithubExecutor } = await import("../../open-sse/executors/github.ts"); +const { BaseExecutor } = await import("../../open-sse/executors/base.ts"); + +function streamFromChunks(chunks) { + const encoder = new TextEncoder(); + return new ReadableStream({ + start(controller) { + for (const chunk of chunks) { + controller.enqueue(encoder.encode(chunk)); + } + controller.close(); + }, + }); +} + +test("T27: Claude + response_format=json_object injects system instruction and strips response_format field", () => { + const executor = new GithubExecutor(); + const request = { + messages: [{ role: "user", content: "return json" }], + response_format: { type: "json_object" }, + }; + + const transformed = executor.transformRequest("claude-sonnet-4.5", request, false, {}); + + assert.equal(transformed.response_format, undefined); + assert.equal(transformed.messages[0].role, "system"); + assert.match( + transformed.messages[0].content, + /Respond only with valid JSON\. Do not include any text/i + ); +}); + +test("T27: non-Claude models keep response_format untouched", () => { + const executor = new GithubExecutor(); + const request = { + messages: [{ role: "user", content: "hello" }], + response_format: { type: "json_object" }, + }; + + const transformed = executor.transformRequest("gpt-4o", request, false, {}); + assert.deepEqual(transformed.response_format, { type: "json_object" }); +}); + +test("T27: SSE [DONE] guard applies only in streaming mode", async () => { + const executor = new GithubExecutor(); + const originalExecute = BaseExecutor.prototype.execute; + + BaseExecutor.prototype.execute = async () => ({ + response: new Response( + streamFromChunks(['data: {"delta":"hello"}\n\n', "data: [DONE]\n\n", "data: tail\n\n"]), + { + status: 200, + headers: { "content-type": "text/event-stream" }, + } + ), + url: "https://api.githubcopilot.com/chat/completions", + }); + + try { + const streamingResult = await executor.execute({ + model: "claude-sonnet-4.5", + body: { messages: [] }, + stream: true, + credentials: { accessToken: "token" }, + }); + const streamingText = await streamingResult.response.text(); + assert.equal(streamingText.includes("data: [DONE]"), false); + assert.equal(streamingText.includes("data: tail"), true); + + const nonStreamingResult = await executor.execute({ + model: "claude-sonnet-4.5", + body: { messages: [] }, + stream: false, + credentials: { accessToken: "token" }, + }); + const nonStreamingText = await nonStreamingResult.response.text(); + assert.equal(nonStreamingText.includes("data: [DONE]"), true); + } finally { + BaseExecutor.prototype.execute = originalExecute; + } +}); diff --git a/tests/unit/t28-model-catalog-updates.test.mjs b/tests/unit/t28-model-catalog-updates.test.mjs new file mode 100644 index 00000000..22ab769a --- /dev/null +++ b/tests/unit/t28-model-catalog-updates.test.mjs @@ -0,0 +1,41 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { getModelInfoCore } from "../../open-sse/services/model.ts"; +import { REGISTRY } from "../../open-sse/config/providerRegistry.ts"; + +test("T28: gemini catalog includes preview models from 9router", () => { + const geminiIds = REGISTRY.gemini.models.map((m) => m.id); + const geminiCliIds = REGISTRY["gemini-cli"].models.map((m) => m.id); + + assert.ok(geminiIds.includes("gemini-3.1-flash-lite-preview")); + assert.ok(geminiIds.includes("gemini-3-flash-preview")); + assert.ok(geminiCliIds.includes("gemini-3.1-flash-lite-preview")); + assert.ok(geminiCliIds.includes("gemini-3-flash-preview")); +}); + +test("T28: vertex catalog includes partner models when vertex executor is available", () => { + const vertexIds = REGISTRY.vertex.models.map((m) => m.id); + + assert.ok(vertexIds.includes("deepseek-v3.2")); + assert.ok(vertexIds.includes("qwen3-next-80b")); + assert.ok(vertexIds.includes("glm-5")); +}); + +test("T28: new catalog models resolve through getModelInfoCore", async () => { + const minimax = await getModelInfoCore("minimax/minimax-m2.7", {}); + assert.equal(minimax.provider, "minimax"); + assert.equal(minimax.model, "minimax-m2.7"); + + const flashLite = await getModelInfoCore("gemini/gemini-3.1-flash-lite-preview", {}); + assert.equal(flashLite.provider, "gemini"); + assert.equal(flashLite.model, "gemini-3.1-flash-lite-preview"); + + const flashPreview = await getModelInfoCore("gemini/gemini-3-flash-preview", {}); + assert.equal(flashPreview.provider, "gemini"); + assert.equal(flashPreview.model, "gemini-3-flash-preview"); + + const vertexPartner = await getModelInfoCore("vertex/qwen3-next-80b", {}); + assert.equal(vertexPartner.provider, "vertex"); + assert.equal(vertexPartner.model, "qwen3-next-80b"); +}); diff --git a/tests/unit/t29-vertex-sa-json-executor.test.mjs b/tests/unit/t29-vertex-sa-json-executor.test.mjs new file mode 100644 index 00000000..47cc63ab --- /dev/null +++ b/tests/unit/t29-vertex-sa-json-executor.test.mjs @@ -0,0 +1,71 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +const { VertexExecutor } = await import("../../open-sse/executors/vertex.ts"); + +const MIN_SA_JSON = JSON.stringify({ + project_id: "vertex-project-123", +}); + +test("T29: Vertex executor builds regional Gemini URL from Service Account project", () => { + const executor = new VertexExecutor(); + const url = executor.buildUrl("gemini-3.1-pro-preview", true, 0, { + apiKey: MIN_SA_JSON, + providerSpecificData: { region: "europe-west4" }, + }); + + assert.equal( + url, + "https://aiplatform.googleapis.com/v1/projects/vertex-project-123/locations/europe-west4/publishers/google/models/gemini-3.1-pro-preview:streamGenerateContent?alt=sse" + ); +}); + +test("T29: Vertex executor routes partner models to global openapi endpoint", () => { + const executor = new VertexExecutor(); + const url = executor.buildUrl("deepseek-v3.2", false, 0, { + apiKey: MIN_SA_JSON, + providerSpecificData: { region: "us-central1" }, + }); + + assert.equal( + url, + "https://aiplatform.googleapis.com/v1/projects/vertex-project-123/locations/global/endpoints/openapi/chat/completions" + ); +}); + +test("T29: Vertex executor defaults region to us-central1 when not configured", () => { + const executor = new VertexExecutor(); + const url = executor.buildUrl("gemini-2.5-flash", false, 0, { + apiKey: MIN_SA_JSON, + providerSpecificData: {}, + }); + + assert.equal( + url, + "https://aiplatform.googleapis.com/v1/projects/vertex-project-123/locations/us-central1/publishers/google/models/gemini-2.5-flash:generateContent" + ); +}); + +test("T29: Vertex executor headers include Bearer token and SSE Accept when streaming", () => { + const executor = new VertexExecutor(); + const headers = executor.buildHeaders({ accessToken: "ya29.test-token" }, true); + + assert.equal(headers["Content-Type"], "application/json"); + assert.equal(headers.Authorization, "Bearer ya29.test-token"); + assert.equal(headers.Accept, "text/event-stream"); +}); + +test("T29: Vertex executor rejects invalid Service Account JSON clearly", async () => { + const executor = new VertexExecutor(); + + await assert.rejects( + () => + executor.execute({ + model: "gemini-2.5-flash", + body: { contents: [] }, + stream: false, + credentials: { apiKey: "not-json" }, + }), + /Service Account JSON/i + ); +}); diff --git a/tests/unit/t30-kiro-400-model-unavailable.test.mjs b/tests/unit/t30-kiro-400-model-unavailable.test.mjs new file mode 100644 index 00000000..59237164 --- /dev/null +++ b/tests/unit/t30-kiro-400-model-unavailable.test.mjs @@ -0,0 +1,29 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +const { isModelUnavailableError, getNextFamilyFallback } = + await import("../../open-sse/services/modelFamilyFallback.ts"); + +test("T30: Kiro 'improperly formed request' 400 is treated as model-unavailable", () => { + const unavailable = isModelUnavailableError( + 400, + "Bad Request: improperly formed request for selected model" + ); + assert.equal(unavailable, true); +}); + +test("T30: generic 400 without model-unavailable signal is not treated as unavailable", () => { + const unavailable = isModelUnavailableError(400, "Bad Request: malformed JSON body"); + assert.equal(unavailable, false); +}); + +test("T30: 404 still maps to model-unavailable", () => { + const unavailable = isModelUnavailableError(404, "not found"); + assert.equal(unavailable, true); +}); + +test("T30: model family helper returns a sibling candidate when available", () => { + const next = getNextFamilyFallback("gemini-3.1-pro-high", new Set(["gemini-3.1-pro-high"])); + assert.equal(typeof next, "string"); + assert.notEqual(next, "gemini-3.1-pro-high"); +}); diff --git a/tests/unit/t31-t33-t34-t38-model-specs.test.mjs b/tests/unit/t31-t33-t34-t38-model-specs.test.mjs new file mode 100644 index 00000000..8a2257bc --- /dev/null +++ b/tests/unit/t31-t33-t34-t38-model-specs.test.mjs @@ -0,0 +1,53 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +const { REGISTRY } = await import("../../open-sse/config/providerRegistry.ts"); +const { resolveModelAlias: resolveDeprecatedAlias } = + await import("../../open-sse/services/modelDeprecation.ts"); +const { normalizeThinkingLevel } = await import("../../open-sse/services/thinkingBudget.ts"); +const { + MODEL_SPECS, + getModelSpec, + capMaxOutputTokens, + resolveModelAlias, + getDefaultThinkingBudget, + capThinkingBudget, +} = await import("../../src/shared/constants/modelSpecs.ts"); + +test("T31: registry exposes Gemini 3.1 Pro High/Low model IDs", () => { + const geminiIds = REGISTRY.gemini.models.map((m) => m.id); + assert.ok(geminiIds.includes("gemini-3.1-pro-high")); + assert.ok(geminiIds.includes("gemini-3.1-pro-low")); +}); + +test("T31: legacy Gemini aliases resolve to Gemini 3.1 IDs", () => { + assert.equal(resolveDeprecatedAlias("gemini-3-pro-high"), "gemini-3.1-pro-high"); + assert.equal(resolveDeprecatedAlias("gemini-3-pro-low"), "gemini-3.1-pro-low"); +}); + +test("T33: thinkingLevel string is converted into numeric thinkingBudget", () => { + const converted = normalizeThinkingLevel({ + model: "gemini-3.1-pro-high", + generationConfig: { + thinkingConfig: { thinkingLevel: "HIGH" }, + }, + }); + + assert.equal(converted.generationConfig.thinkingConfig.thinkingBudget, 24576); + assert.equal(converted.generationConfig.thinkingConfig.thinkingLevel, undefined); +}); + +test("T34: max output tokens are capped by model spec", () => { + assert.equal(capMaxOutputTokens("gemini-3-flash", 131072), 65536); + assert.equal(capMaxOutputTokens("gemini-3-flash"), 65536); + assert.equal(capMaxOutputTokens("gemini-3.1-pro-high", 131072), 131072); +}); + +test("T38: modelSpecs exposes centralized helpers with alias and prefix lookup", () => { + assert.equal(typeof MODEL_SPECS["gemini-3.1-pro-high"], "object"); + assert.equal(getModelSpec("gemini-3-pro-high").maxOutputTokens, 131072); + assert.equal(getModelSpec("gemini-3-flash-preview").maxOutputTokens, 65536); + assert.equal(resolveModelAlias("gemini-3-pro-low"), "gemini-3.1-pro-low"); + assert.equal(getDefaultThinkingBudget("gemini-3.1-pro-high"), 24576); + assert.equal(capThinkingBudget("gemini-3.1-pro-low", 50000), 16000); +}); diff --git a/tests/unit/t40-opencode-cli-tools-integration.test.mjs b/tests/unit/t40-opencode-cli-tools-integration.test.mjs new file mode 100644 index 00000000..dd903beb --- /dev/null +++ b/tests/unit/t40-opencode-cli-tools-integration.test.mjs @@ -0,0 +1,67 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import path from "node:path"; + +const { CLI_TOOLS } = await import("../../src/shared/constants/cliTools.ts"); +const { resolveOpencodeConfigPath } = await import("../../src/shared/services/cliRuntime.ts"); +const { buildOpenCodeProviderConfig, mergeOpenCodeConfig } = + await import("../../src/shared/services/opencodeConfig.ts"); + +test("T40: OpenCode card documents config paths and --variant usage", () => { + const opencode = CLI_TOOLS.opencode; + assert.ok(opencode, "OpenCode tool card must exist"); + + const notesText = (opencode.notes || []) + .map((note) => note?.text || "") + .join(" ") + .toLowerCase(); + + assert.match(notesText, /\.config\/opencode\/opencode\.json/); + assert.match(notesText, /%appdata%/); + assert.match(notesText, /--variant/); +}); + +test("T40: OpenCode config path resolves per-platform", () => { + const linuxWithXdg = resolveOpencodeConfigPath( + "linux", + { XDG_CONFIG_HOME: "/tmp/xdg-config-home" }, + "/home/dev" + ); + assert.equal(linuxWithXdg, path.join("/tmp/xdg-config-home", "opencode", "opencode.json")); + + const linuxDefault = resolveOpencodeConfigPath("linux", {}, "/home/dev"); + assert.equal(linuxDefault, path.join("/home/dev", ".config", "opencode", "opencode.json")); + + const windowsPath = resolveOpencodeConfigPath( + "win32", + { APPDATA: "C:\\Users\\dev\\AppData\\Roaming" }, + "C:\\Users\\dev" + ); + assert.equal( + windowsPath, + path.join("C:\\Users\\dev\\AppData\\Roaming", "opencode", "opencode.json") + ); +}); + +test("T40: OpenCode config generator includes endpoint and selected API key", () => { + const providerConfig = buildOpenCodeProviderConfig({ + baseUrl: "http://localhost:20128/v1/", + apiKey: "sk_test_opencode", + model: "claude-sonnet-4-5-thinking", + }); + assert.equal(providerConfig.baseURL, "http://localhost:20128/v1"); + assert.equal(providerConfig.apiKey, "sk_test_opencode"); + assert.ok(providerConfig.models.includes("claude-sonnet-4-5-thinking")); + + const mergedConfig = mergeOpenCodeConfig( + { providers: { custom: { name: "Custom Provider" } } }, + { + baseUrl: "http://localhost:20128/v1", + apiKey: "sk_test_opencode", + model: "claude-sonnet-4-5-thinking", + } + ); + assert.ok(mergedConfig.providers.custom); + assert.equal(mergedConfig.providers.omniroute.baseURL, "http://localhost:20128/v1"); + assert.equal(mergedConfig.providers.omniroute.apiKey, "sk_test_opencode"); +}); diff --git a/tests/unit/t42-image-size-to-aspect-ratio.test.mjs b/tests/unit/t42-image-size-to-aspect-ratio.test.mjs new file mode 100644 index 00000000..4755a53c --- /dev/null +++ b/tests/unit/t42-image-size-to-aspect-ratio.test.mjs @@ -0,0 +1,96 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +const { mapImageSize } = await import("../../open-sse/translator/image/sizeMapper.ts"); +const { handleImageGeneration } = await import("../../open-sse/handlers/imageGeneration.ts"); +const { IMAGE_PROVIDERS } = await import("../../open-sse/config/imageRegistry.ts"); + +test("T42: size mapper converts OpenAI sizes and preserves direct aspect ratios", () => { + assert.equal(mapImageSize("1024x1024"), "1:1"); + assert.equal(mapImageSize("1792x1024"), "16:9"); + assert.equal(mapImageSize("16:9"), "16:9"); + assert.equal(mapImageSize("333x777"), "1:1"); + assert.equal(mapImageSize(undefined), "1:1"); +}); + +test("T42: Imagen3 requests send mapped aspect_ratio and normalize to OpenAI response shape", async () => { + const testProviderId = "t42-imagen3"; + const originalProvider = IMAGE_PROVIDERS[testProviderId]; + const originalFetch = globalThis.fetch; + let capturedRequestBody = null; + + IMAGE_PROVIDERS[testProviderId] = { + id: testProviderId, + baseUrl: "https://example.com/imagen3", + authType: "apikey", + authHeader: "bearer", + format: "imagen3", + models: [{ id: "test-model", name: "Test Imagen3" }], + supportedSizes: ["1024x1024", "1792x1024", "16:9"], + }; + + globalThis.fetch = async (_url, options = {}) => { + capturedRequestBody = JSON.parse(String(options.body || "{}")); + return new Response( + JSON.stringify({ + images: [{ image: "ZmFrZS1pbWFnZS1iYXNlNjQ=" }], + }), + { + status: 200, + headers: { "content-type": "application/json" }, + } + ); + }; + + try { + const resultLandscape = await handleImageGeneration({ + body: { + model: `${testProviderId}/test-model`, + prompt: "a mountain at sunrise", + size: "1792x1024", + n: 1, + }, + credentials: { apiKey: "test-key" }, + log: { info: () => {}, error: () => {} }, + }); + + assert.equal(capturedRequestBody.aspect_ratio, "16:9"); + assert.equal(resultLandscape.success, true); + assert.ok(Number.isFinite(resultLandscape.data.created)); + assert.ok(Array.isArray(resultLandscape.data.data)); + assert.equal(resultLandscape.data.data[0].b64_json, "ZmFrZS1pbWFnZS1iYXNlNjQ="); + + const resultDirectRatio = await handleImageGeneration({ + body: { + model: `${testProviderId}/test-model`, + prompt: "portrait photo", + size: "16:9", + n: 1, + }, + credentials: { apiKey: "test-key" }, + log: { info: () => {}, error: () => {} }, + }); + assert.equal(capturedRequestBody.aspect_ratio, "16:9"); + assert.equal(resultDirectRatio.success, true); + + const resultFallback = await handleImageGeneration({ + body: { + model: `${testProviderId}/test-model`, + prompt: "abstract art", + size: "333x777", + n: 1, + }, + credentials: { apiKey: "test-key" }, + log: { info: () => {}, error: () => {} }, + }); + assert.equal(capturedRequestBody.aspect_ratio, "1:1"); + assert.equal(resultFallback.success, true); + } finally { + globalThis.fetch = originalFetch; + if (originalProvider) { + IMAGE_PROVIDERS[testProviderId] = originalProvider; + } else { + delete IMAGE_PROVIDERS[testProviderId]; + } + } +}); diff --git a/tests/unit/thinking-budget.test.mjs b/tests/unit/thinking-budget.test.mjs index 68c2ce73..ee7cdcfa 100644 --- a/tests/unit/thinking-budget.test.mjs +++ b/tests/unit/thinking-budget.test.mjs @@ -169,9 +169,9 @@ test("EFFORT_BUDGETS has expected keys", () => { test("THINKING_LEVEL_MAP has all expected levels", () => { assert.equal(THINKING_LEVEL_MAP.none, 0); - assert.equal(THINKING_LEVEL_MAP.low, 1024); - assert.equal(THINKING_LEVEL_MAP.medium, 10240); - assert.equal(THINKING_LEVEL_MAP.high, 131072); + assert.equal(THINKING_LEVEL_MAP.low, 4096); + assert.equal(THINKING_LEVEL_MAP.medium, 8192); + assert.equal(THINKING_LEVEL_MAP.high, 24576); }); test("normalizeThinkingLevel: converts thinkingLevel 'high' to budget", () => { @@ -182,7 +182,7 @@ test("normalizeThinkingLevel: converts thinkingLevel 'high' to budget", () => { }; const result = normalizeThinkingLevel(body); assert.equal(result.thinking.type, "enabled"); - assert.equal(result.thinking.budget_tokens, 131072); + assert.equal(result.thinking.budget_tokens, 24576); assert.equal(result.thinkingLevel, undefined); }); @@ -194,7 +194,7 @@ test("normalizeThinkingLevel: converts thinking_level 'low' to budget", () => { }; const result = normalizeThinkingLevel(body); assert.equal(result.thinking.type, "enabled"); - assert.equal(result.thinking.budget_tokens, 1024); + assert.equal(result.thinking.budget_tokens, 4096); assert.equal(result.thinking_level, undefined); }); @@ -213,7 +213,7 @@ test("normalizeThinkingLevel: converts Gemini thinkingConfig.thinkingLevel", () }, }; const result = normalizeThinkingLevel(body); - assert.equal(result.generationConfig.thinkingConfig.thinkingBudget, 131072); + assert.equal(result.generationConfig.thinkingConfig.thinkingBudget, 24576); assert.equal(result.generationConfig.thinking_config, undefined); }); @@ -269,7 +269,7 @@ test("applyThinkingBudget: thinkingLevel 'high' + PASSTHROUGH = converts and pas messages: [{ role: "user", content: "hello" }], }; const result = applyThinkingBudget(body); - assert.equal(result.thinking.budget_tokens, 131072); + assert.equal(result.thinking.budget_tokens, 24576); assert.equal(result.thinkingLevel, undefined); setThinkingBudgetConfig(DEFAULT_THINKING_CONFIG); });