From c34b3f41bdc904bd3e48dff50a3ea3a22ff42c11 Mon Sep 17 00:00:00 2001
From: diegosouzapw <diegosouzapw@users.noreply.github.com>
Date: Mon, 23 Mar 2026 11:08:14 -0300
Subject: [PATCH] feat: Add requested model to logs, enhance background task
 detection, and introduce AI SDK compatibility utilities.

---
 docs/API_REFERENCE.md                         |  23 +--
 docs/USER_GUIDE.md                            |  16 ++
 open-sse/config/providerRegistry.ts           |  17 +++
 open-sse/handlers/chatCore.ts                 |  42 +++---
 open-sse/handlers/responseTranslator.ts       |  71 ++++++---
 open-sse/services/accountFallback.ts          |  13 ++
 open-sse/services/backgroundTaskDetector.ts   | 138 +++++++++++------
 open-sse/services/sessionManager.ts           |  11 ++
 open-sse/services/thinkingBudget.ts           |  12 +-
 open-sse/utils/aiSdkCompat.ts                 |  31 ++++
 open-sse/utils/proxyFetch.ts                  |  17 +++
 .../api-manager/ApiManagerPageClient.tsx      |  71 +++++++++
 .../cli-tools/components/DefaultToolCard.tsx  |   2 +-
 .../ProviderLimits/ProviderLimitCard.tsx      |   1 +
 .../ProviderLimits/QuotaProgressBar.tsx       |  10 +-
 .../components/ProviderLimits/QuotaTable.tsx  |   5 +-
 .../usage/components/ProviderLimits/index.tsx |  29 +++-
 .../usage/components/ProviderLimits/utils.tsx |  90 +++++------
 .../guide-settings/[toolId]/route.ts          |  49 +++---
 src/app/api/keys/[id]/route.ts                |   3 +
 src/app/api/providers/[id]/test/route.ts      |   2 +
 src/app/api/providers/validate/route.ts       |   2 +
 src/app/api/sessions/route.ts                 |   4 +-
 src/lib/providers/validation.ts               |  85 ++++++++++-
 src/lib/usage/callLogs.ts                     |   6 +-
 src/shared/components/RequestLoggerV2.tsx     |  32 +++-
 src/shared/components/UsageAnalytics.tsx      |  17 ++-
 src/shared/constants/cliTools.ts              |  34 +++++
 src/shared/constants/modelSpecs.ts            | 111 ++++++++++++++
 src/shared/constants/pricing.ts               | 131 ++++++++++++----
 src/shared/services/cliRuntime.ts             |  32 +++-
 src/shared/services/opencodeConfig.ts         |  64 ++++++++
 src/shared/utils/apiKeyPolicy.ts              |   1 +
 src/shared/validation/schemas.ts              |   3 +
 src/sse/handlers/chat.ts                      |  90 ++++++++++-
 src/sse/services/auth.ts                      | 130 +++++++++++++++-
 tests/unit/background-task-detector.test.mjs  |  25 +++-
 tests/unit/call-logs-requested-model.test.mjs |  52 +++++++
 tests/unit/fixes-p1.test.mjs                  |  29 +++-
 .../openai-to-claude-strip-empty.test.mjs     |  33 +++-
 tests/unit/t07-no-log-key-config.test.mjs     |   5 +
 tests/unit/t12-pricing-updates.test.mjs       |  34 +++++
 tests/unit/t13-stale-quota-display.test.mjs   |  31 ++++
 tests/unit/t14-proxy-fast-fail.test.mjs       |  35 +++++
 .../unit/t16-gemini-enum-type-string.test.mjs |  53 +++++++
 ...t19-codex-responses-empty-content.test.mjs |  66 ++++++++
 tests/unit/t20-t22-provider-headers.test.mjs  |  31 ++++
 .../unit/t23-t24-fallback-resilience.test.mjs | 141 ++++++++++++++++++
 ...vider-validation-modelid-fallback.test.mjs | 116 ++++++++++++++
 .../t26-ai-sdk-accept-header-compat.test.mjs  |  30 ++++
 ...27-github-copilot-response-format.test.mjs |  84 +++++++++++
 tests/unit/t28-model-catalog-updates.test.mjs |  41 +++++
 .../unit/t29-vertex-sa-json-executor.test.mjs |  71 +++++++++
 .../t30-kiro-400-model-unavailable.test.mjs   |  29 ++++
 .../unit/t31-t33-t34-t38-model-specs.test.mjs |  53 +++++++
 ...40-opencode-cli-tools-integration.test.mjs |  67 +++++++++
 .../t42-image-size-to-aspect-ratio.test.mjs   |  96 ++++++++++++
 tests/unit/thinking-budget.test.mjs           |  14 +-
 58 files changed, 2290 insertions(+), 241 deletions(-)
 create mode 100644 open-sse/utils/aiSdkCompat.ts
 create mode 100644 src/shared/constants/modelSpecs.ts
 create mode 100644 src/shared/services/opencodeConfig.ts
 create mode 100644 tests/unit/call-logs-requested-model.test.mjs
 create mode 100644 tests/unit/t12-pricing-updates.test.mjs
 create mode 100644 tests/unit/t13-stale-quota-display.test.mjs
 create mode 100644 tests/unit/t14-proxy-fast-fail.test.mjs
 create mode 100644 tests/unit/t16-gemini-enum-type-string.test.mjs
 create mode 100644 tests/unit/t19-codex-responses-empty-content.test.mjs
 create mode 100644 tests/unit/t20-t22-provider-headers.test.mjs
 create mode 100644 tests/unit/t23-t24-fallback-resilience.test.mjs
 create mode 100644 tests/unit/t25-provider-validation-modelid-fallback.test.mjs
 create mode 100644 tests/unit/t26-ai-sdk-accept-header-compat.test.mjs
 create mode 100644 tests/unit/t27-github-copilot-response-format.test.mjs
 create mode 100644 tests/unit/t28-model-catalog-updates.test.mjs
 create mode 100644 tests/unit/t29-vertex-sa-json-executor.test.mjs
 create mode 100644 tests/unit/t30-kiro-400-model-unavailable.test.mjs
 create mode 100644 tests/unit/t31-t33-t34-t38-model-specs.test.mjs
 create mode 100644 tests/unit/t40-opencode-cli-tools-integration.test.mjs
 create mode 100644 tests/unit/t42-image-size-to-aspect-ratio.test.mjs

diff --git a/docs/API_REFERENCE.md b/docs/API_REFERENCE.md
index 39c6f3f5..a0186e80 100644
--- a/docs/API_REFERENCE.md
+++ b/docs/API_REFERENCE.md
@@ -38,15 +38,20 @@ Content-Type: application/json
 
 ### Custom Headers
 
-| Header                   | Direction | Description                       |
-| ------------------------ | --------- | --------------------------------- |
-| `X-OmniRoute-No-Cache`   | Request   | Set to `true` to bypass cache     |
-| `X-OmniRoute-Progress`   | Request   | Set to `true` for progress events |
-| `Idempotency-Key`        | Request   | Dedup key (5s window)             |
-| `X-Request-Id`           | Request   | Alternative dedup key             |
-| `X-OmniRoute-Cache`      | Response  | `HIT` or `MISS` (non-streaming)   |
-| `X-OmniRoute-Idempotent` | Response  | `true` if deduplicated            |
-| `X-OmniRoute-Progress`   | Response  | `enabled` if progress tracking on |
+| Header                   | Direction | Description                                      |
+| ------------------------ | --------- | ------------------------------------------------ |
+| `X-OmniRoute-No-Cache`   | Request   | Set to `true` to bypass cache                    |
+| `X-OmniRoute-Progress`   | Request   | Set to `true` for progress events                |
+| `X-Session-Id`           | Request   | Sticky session key for external session affinity |
+| `x_session_id`           | Request   | Underscore variant also accepted (direct HTTP)   |
+| `Idempotency-Key`        | Request   | Dedup key (5s window)                            |
+| `X-Request-Id`           | Request   | Alternative dedup key                            |
+| `X-OmniRoute-Cache`      | Response  | `HIT` or `MISS` (non-streaming)                  |
+| `X-OmniRoute-Idempotent` | Response  | `true` if deduplicated                           |
+| `X-OmniRoute-Progress`   | Response  | `enabled` if progress tracking on                |
+| `X-OmniRoute-Session-Id` | Response  | Effective session ID used by OmniRoute           |
+
+> Nginx note: if you rely on underscore headers (for example `x_session_id`), enable `underscores_in_headers on;`.
 
 ---
 
diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md
index 8ae8fe43..6b68c77d 100644
--- a/docs/USER_GUIDE.md
+++ b/docs/USER_GUIDE.md
@@ -578,6 +578,22 @@ Configure via **Dashboard → Settings → Routing**.
 | **Least Used**                 | Routes to the account with the oldest `lastUsedAt` timestamp, distributing traffic evenly        |
 | **Cost Optimized**             | Routes to the account with the lowest priority value, optimizing for lowest-cost providers       |
 
+#### External Sticky Session Header
+
+For external session affinity (for example, Claude Code/Codex agents behind reverse proxies), send:
+
+```http
+X-Session-Id: your-session-key
+```
+
+OmniRoute also accepts `x_session_id` and returns the effective session key in `X-OmniRoute-Session-Id`.
+
+If you use Nginx and send underscore-form headers, enable:
+
+```nginx
+underscores_in_headers on;
+```
+
 #### Wildcard Model Aliases
 
 Create wildcard patterns to remap model names:
diff --git a/open-sse/config/providerRegistry.ts b/open-sse/config/providerRegistry.ts
index aa4b057b..756a5881 100644
--- a/open-sse/config/providerRegistry.ts
+++ b/open-sse/config/providerRegistry.ts
@@ -192,6 +192,8 @@ export const REGISTRY: Record<string, RegistryEntry> = {
       { id: "gemini-3.1-pro", name: "Gemini 3.1 Pro" },
       { id: "gemini-3-1-pro", name: "Gemini 3.1 Pro (Alt ID)" },
       { id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
+      { id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
+      { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
       { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
       { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
       { id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" },
@@ -226,6 +228,8 @@ export const REGISTRY: Record<string, RegistryEntry> = {
       { id: "gemini-3.1-pro", name: "Gemini 3.1 Pro" },
       { id: "gemini-3-1-pro", name: "Gemini 3.1 Pro (Alt ID)" },
       { id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
+      { id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
+      { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
       { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
       { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
       { id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" },
@@ -782,6 +786,10 @@ export const REGISTRY: Record<string, RegistryEntry> = {
       "Anthropic-Beta": "claude-code-20250219,interleaved-thinking-2025-05-14",
     },
     models: [
+      // T12/T28: MiniMax default upgraded from M2.5 to M2.7
+      { id: "minimax-m2.7", name: "MiniMax M2.7" },
+      { id: "MiniMax-M2.7", name: "MiniMax M2.7 (Legacy Alias)" },
+      { id: "minimax-m2.7-highspeed", name: "MiniMax M2.7 Highspeed" },
       { id: "minimax-m2.5", name: "MiniMax M2.5" },
       { id: "MiniMax-M2.5", name: "MiniMax M2.5 (Legacy Alias)" },
       { id: "MiniMax-M2.1", name: "MiniMax M2.1" },
@@ -803,6 +811,9 @@ export const REGISTRY: Record<string, RegistryEntry> = {
     },
     models: [
       // Keep parity with minimax to ensure model discovery works for minimax-cn connections.
+      { id: "minimax-m2.7", name: "MiniMax M2.7" },
+      { id: "MiniMax-M2.7", name: "MiniMax M2.7 (Legacy Alias)" },
+      { id: "minimax-m2.7-highspeed", name: "MiniMax M2.7 Highspeed" },
       { id: "minimax-m2.5", name: "MiniMax M2.5" },
       { id: "MiniMax-M2.5", name: "MiniMax M2.5 (Legacy Alias)" },
       { id: "MiniMax-M2.1", name: "MiniMax M2.1" },
@@ -1196,10 +1207,16 @@ export const REGISTRY: Record<string, RegistryEntry> = {
     authType: "apikey",
     authHeader: "bearer",
     models: [
+      { id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview (Vertex)" },
+      { id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview (Vertex)" },
+      { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview (Vertex)" },
       { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro (Vertex)" },
       { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash (Vertex)" },
       { id: "gemini-2.0-flash-thinking-exp", name: "Gemini 2.0 Flash Thinking Exp (Vertex)" },
       { id: "gemma-2-27b-it", name: "Gemma 2 27B (Vertex)" },
+      { id: "deepseek-v3.2", name: "DeepSeek V3.2 (Vertex Partner)" },
+      { id: "qwen3-next-80b", name: "Qwen3 Next 80B (Vertex Partner)" },
+      { id: "glm-5", name: "GLM-5 (Vertex Partner)" },
       { id: "claude-opus-4-5@20251101", name: "Claude Opus 4.5 (Vertex)" },
       { id: "claude-sonnet-4-5@20251101", name: "Claude Sonnet 4.5 (Vertex)" },
     ],
diff --git a/open-sse/handlers/chatCore.ts b/open-sse/handlers/chatCore.ts
index cbd10ab1..d7bcfde9 100644
--- a/open-sse/handlers/chatCore.ts
+++ b/open-sse/handlers/chatCore.ts
@@ -18,6 +18,7 @@ import { createErrorResult, parseUpstreamError, formatProviderError } from "../u
 import { HTTP_STATUS } from "../config/constants.ts";
 import { classifyProviderError, PROVIDER_ERROR_TYPES } from "../services/errorClassifier.ts";
 import { updateProviderConnection } from "@/lib/db/providers";
+import { logAuditEvent } from "@/lib/compliance";
 import { handleBypassRequest } from "../utils/bypassHandler.ts";
 import {
   saveRequestUsage,
@@ -52,7 +53,7 @@ import { createProgressTransform, wantsProgress } from "../utils/progressTracker
 import { isModelUnavailableError, getNextFamilyFallback } from "../services/modelFamilyFallback.ts";
 import { computeRequestHash, deduplicate, shouldDeduplicate } from "../services/requestDedup.ts";
 import {
-  isBackgroundTask,
+  getBackgroundTaskReason,
   getDegradedModel,
   getBackgroundDegradationConfig,
 } from "../services/backgroundTaskDetector.ts";
@@ -61,6 +62,7 @@ import {
   isFallbackDecision,
   EMERGENCY_FALLBACK_CONFIG,
 } from "../services/emergencyFallback.ts";
+import { resolveStreamFlag, stripMarkdownCodeFence } from "../utils/aiSdkCompat.ts";
 
 export function shouldUseNativeCodexPassthrough({
   provider,
@@ -234,17 +236,32 @@ export async function handleChatCore({
 
   // ── Background Task Redirection (T41) ──
   const bgConfig = getBackgroundDegradationConfig();
-  if (bgConfig.enabled && isBackgroundTask(body, clientRawRequest?.headers)) {
+  const backgroundReason = bgConfig.enabled
+    ? getBackgroundTaskReason(body, clientRawRequest?.headers)
+    : null;
+  if (backgroundReason) {
     const degradedModel = getDegradedModel(model);
     if (degradedModel !== model) {
+      const originalModel = model;
       log?.info?.(
         "BACKGROUND",
-        `Background task detected: Redirecting ${model} → ${degradedModel}`
+        `Background task redirect (${backgroundReason}): ${originalModel} → ${degradedModel}`
       );
       model = degradedModel;
       if (body && typeof body === "object") {
         body.model = model;
       }
+
+      logAuditEvent({
+        action: "routing.background_task_redirect",
+        actor: apiKeyInfo?.name || "system",
+        target: connectionId || provider || "chat",
+        details: {
+          original_model: originalModel,
+          redirected_to: degradedModel,
+          reason: backgroundReason,
+        },
+      });
     }
   }
 
@@ -269,12 +286,7 @@ export async function handleChatCore({
       ? clientRawRequest.headers.get("accept") || clientRawRequest.headers.get("Accept")
       : (clientRawRequest?.headers || {})["accept"] || (clientRawRequest?.headers || {})["Accept"];
 
-  const clientWantsJson =
-    typeof acceptHeader === "string" &&
-    acceptHeader.includes("application/json") &&
-    !acceptHeader.includes("text/event-stream");
-
-  const stream = body.stream === true && !clientWantsJson;
+  const stream = resolveStreamFlag(body?.stream, acceptHeader);
 
   // ── Phase 9.1: Semantic cache check (non-streaming, temp=0 only) ──
   if (isCacheable(body, clientRawRequest?.headers)) {
@@ -1004,14 +1016,10 @@ export async function handleChatCore({
 
     // T26: Strip markdown code blocks if provider format is Claude
     if (sourceFormat === "claude" && !stream) {
-      if (translatedResponse?.choices?.[0]?.message?.content) {
-        const text = translatedResponse.choices[0].message.content;
-        const codeBlockRegex =
-          /^```(?:json|javascript|typescript|js|ts)?\s*\n?([\s\S]*?)\n?```\s*$/;
-        const match = text.trim().match(codeBlockRegex);
-        if (match) {
-          translatedResponse.choices[0].message.content = match[1].trim();
-        }
+      if (typeof translatedResponse?.choices?.[0]?.message?.content === "string") {
+        translatedResponse.choices[0].message.content = stripMarkdownCodeFence(
+          translatedResponse.choices[0].message.content
+        ) as string;
       }
     }
 
diff --git a/open-sse/handlers/responseTranslator.ts b/open-sse/handlers/responseTranslator.ts
index d24213ba..b4fe4472 100644
--- a/open-sse/handlers/responseTranslator.ts
+++ b/open-sse/handlers/responseTranslator.ts
@@ -20,6 +20,51 @@ function toNumber(value: unknown, fallback = 0): number {
   return Number.isFinite(parsed) ? parsed : fallback;
 }
 
+function extractMessageOutputText(item: JsonRecord): string {
+  if (!Array.isArray(item.content)) return "";
+  let text = "";
+  for (const part of item.content) {
+    if (!part || typeof part !== "object") continue;
+    const partObj = toRecord(part);
+    if (partObj.type === "output_text" && typeof partObj.text === "string") {
+      text += partObj.text;
+    }
+  }
+  return text;
+}
+
+/**
+ * T19: Pick the last non-empty message output text from Responses API output.
+ * Falls back to the last message item even when all message texts are empty.
+ */
+function findBestMessageText(output: unknown[]): {
+  text: string;
+  selectedMessageIndex: number;
+  messageItems: JsonRecord[];
+} {
+  const messageItems = output
+    .map((item) => toRecord(item))
+    .filter((item) => item.type === "message" && Array.isArray(item.content));
+
+  for (let i = messageItems.length - 1; i >= 0; i -= 1) {
+    const text = extractMessageOutputText(messageItems[i]);
+    if (text.trim().length > 0) {
+      return { text, selectedMessageIndex: i, messageItems };
+    }
+  }
+
+  if (messageItems.length > 0) {
+    const lastIndex = messageItems.length - 1;
+    return {
+      text: extractMessageOutputText(messageItems[lastIndex]),
+      selectedMessageIndex: lastIndex,
+      messageItems,
+    };
+  }
+
+  return { text: "", selectedMessageIndex: -1, messageItems: [] };
+}
+
 /**
  * Translate non-streaming response to OpenAI format
  * Handles different provider response formats (Gemini, Claude, etc.)
@@ -44,7 +89,8 @@ export function translateNonStreamingResponse(
     const output = Array.isArray(response.output) ? response.output : [];
     const usage = toRecord(response.usage ?? responseRoot.usage);
 
-    let textContent = "";
+    const messageSelection = findBestMessageText(output);
+    let textContent = messageSelection.text;
     let reasoningContent = "";
     const toolCalls: JsonRecord[] = [];
 
@@ -56,9 +102,7 @@ export function translateNonStreamingResponse(
         for (const part of itemObj.content) {
           if (!part || typeof part !== "object") continue;
           const partObj = toRecord(part);
-          if (partObj.type === "output_text" && typeof partObj.text === "string") {
-            textContent += partObj.text;
-          } else if (partObj.type === "summary_text" && typeof partObj.text === "string") {
+          if (partObj.type === "summary_text" && typeof partObj.text === "string") {
             reasoningContent += partObj.text;
           }
         }
@@ -104,21 +148,14 @@ export function translateNonStreamingResponse(
     }
 
     if (process.env.DEBUG_RESPONSES_SSE_TO_JSON === "true") {
-      const msgItems = output.filter((i) => toRecord(i).type === "message");
-      console.log(`[ResponsesSSE] ${output.length} output items, ${msgItems.length} message items`);
-      msgItems.forEach((item, idx) => {
-        const itemObj = toRecord(item);
-        let textLen = 0;
-        if (Array.isArray(itemObj.content)) {
-          for (const part of itemObj.content) {
-            const partObj = toRecord(part);
-            if (partObj.type === "output_text" && typeof partObj.text === "string") {
-              textLen += partObj.text.length;
-            }
-          }
-        }
+      console.log(
+        `[ResponsesSSE] ${output.length} output items, ${messageSelection.messageItems.length} message items`
+      );
+      messageSelection.messageItems.forEach((item, idx) => {
+        const textLen = extractMessageOutputText(item).length;
         console.log(`  [${idx}] text length: ${textLen}`);
       });
+      console.log(`  → Selected message index: ${messageSelection.selectedMessageIndex}`);
       console.log(`  → Final text content length: ${textContent.length}`);
     }
 
diff --git a/open-sse/services/accountFallback.ts b/open-sse/services/accountFallback.ts
index 44f7ed12..066c072c 100644
--- a/open-sse/services/accountFallback.ts
+++ b/open-sse/services/accountFallback.ts
@@ -508,6 +508,19 @@ export function checkFallbackError(
     HTTP_STATUS.GATEWAY_TIMEOUT,
   ];
   if (transientStatuses.includes(status)) {
+    const resetTime = parseResetFromHeaders(headers, errorStr);
+    if (resetTime) {
+      const waitMs = resetTime - Date.now();
+      if (waitMs > 60_000) {
+        return {
+          shouldFallback: true,
+          cooldownMs: waitMs,
+          newBackoffLevel: 0,
+          reason: RateLimitReason.SERVER_ERROR,
+        };
+      }
+    }
+
     const profile = provider ? getProviderProfile(provider) : null;
     const baseCooldown = profile?.transientCooldown ?? COOLDOWN_MS.transientInitial;
     const maxLevel = profile?.maxBackoffLevel ?? BACKOFF_CONFIG.maxLevel;
diff --git a/open-sse/services/backgroundTaskDetector.ts b/open-sse/services/backgroundTaskDetector.ts
index 8d30762e..c767c4c7 100644
--- a/open-sse/services/backgroundTaskDetector.ts
+++ b/open-sse/services/backgroundTaskDetector.ts
@@ -47,16 +47,16 @@ const DEFAULT_DETECTION_PATTERNS = [
 
 const DEFAULT_DEGRADATION_MAP: Record<string, string> = {
   // Premium → Cheap alternatives
-  "claude-opus-4-6": "gemini-2.5-flash",
-  "claude-opus-4-6-thinking": "gemini-2.5-flash",
-  "claude-opus-4-5-20251101": "gemini-2.5-flash",
-  "claude-sonnet-4-5-20250929": "gemini-2.5-flash",
-  "claude-sonnet-4-20250514": "gemini-2.5-flash",
-  "claude-sonnet-4": "gemini-2.5-flash",
-  "gemini-3.1-pro": "gemini-3.1-flash",
-  "gemini-3.1-pro-high": "gemini-3.1-flash",
+  "claude-opus-4-6": "gemini-3-flash",
+  "claude-opus-4-6-thinking": "gemini-3-flash",
+  "claude-opus-4-5-20251101": "gemini-3-flash",
+  "claude-sonnet-4-5-20250929": "gemini-3-flash",
+  "claude-sonnet-4-20250514": "gemini-3-flash",
+  "claude-sonnet-4": "gemini-3-flash",
+  "gemini-3.1-pro": "gemini-3-flash",
+  "gemini-3.1-pro-high": "gemini-3-flash",
   "gemini-3-pro-preview": "gemini-3-flash-preview",
-  "gemini-2.5-pro": "gemini-2.5-flash",
+  "gemini-2.5-pro": "gemini-3-flash",
   "gpt-4o": "gpt-4o-mini",
   "gpt-5": "gpt-5-mini",
   "gpt-5.1": "gpt-5-mini",
@@ -114,12 +114,93 @@ interface BackgroundMessage {
 interface BackgroundTaskBody {
   messages?: BackgroundMessage[];
   input?: BackgroundMessage[];
+  max_tokens?: unknown;
+  max_completion_tokens?: unknown;
+  max_output_tokens?: unknown;
 }
 
 function toMessageArray(value: unknown): BackgroundMessage[] {
   return Array.isArray(value) ? (value as BackgroundMessage[]) : [];
 }
 
+function toFiniteNumber(value: unknown): number | null {
+  if (typeof value === "number" && Number.isFinite(value)) return value;
+  if (typeof value === "string" && value.trim().length > 0) {
+    const parsed = Number(value);
+    return Number.isFinite(parsed) ? parsed : null;
+  }
+  return null;
+}
+
+function headerValue(headers: Record<string, string> | null, key: string): string {
+  if (!headers) return "";
+  const value = headers[key] ?? headers[key.toLowerCase()] ?? headers[key.toUpperCase()];
+  return typeof value === "string" ? value.trim() : "";
+}
+
+/**
+ * Get reason label when request is a background/utility task.
+ *
+ * @param {object} body - Request body
+ * @param {object} [headers] - Request headers (optional)
+ * @returns {string | null} Reason label or null when not detected
+ */
+export function getBackgroundTaskReason(
+  body: BackgroundTaskBody | unknown,
+  headers: Record<string, string> | null = null
+): string | null {
+  if (!body || typeof body !== "object") return null;
+  const typedBody = body as BackgroundTaskBody;
+
+  // 1. Check explicit header
+  if (headers) {
+    const taskType = headerValue(headers, "x-task-type");
+    const priority = headerValue(headers, "x-request-priority");
+    const initiator = headerValue(headers, "x-initiator");
+    const explicitValue = [taskType, priority, initiator].find(Boolean);
+    if (explicitValue && explicitValue.toLowerCase() === "background") {
+      return "header_background";
+    }
+  }
+
+  // 2. Very low max tokens usually indicates utility/background tasks
+  const maxTokens = toFiniteNumber(
+    typedBody.max_tokens ?? typedBody.max_completion_tokens ?? typedBody.max_output_tokens
+  );
+  if (maxTokens !== null && maxTokens > 0 && maxTokens < 50) {
+    return "low_max_tokens";
+  }
+
+  // 3. Check system prompt for background task patterns
+  const messages = toMessageArray(typedBody.messages ?? typedBody.input ?? []);
+  if (!Array.isArray(messages) || messages.length === 0) return null;
+
+  // Find system message
+  const systemMsg = messages.find(
+    (message: BackgroundMessage) => message.role === "system" || message.role === "developer"
+  );
+  if (!systemMsg) return null;
+
+  const systemContent =
+    typeof systemMsg.content === "string" ? systemMsg.content.toLowerCase() : "";
+
+  if (!systemContent) return null;
+
+  // Check against detection patterns
+  const matched = _config.detectionPatterns.some((pattern) =>
+    systemContent.includes(pattern.toLowerCase())
+  );
+
+  if (!matched) return null;
+
+  // 4. Additional heuristic: background tasks typically have very few messages
+  // (system + 1-2 user messages)
+  const userMessages = messages.filter((message: BackgroundMessage) => message.role === "user");
+  if (userMessages.length > 3) return null; // Too many turns for a background task
+
+  return "system_prompt_pattern";
+}
+
 /**
  * Check if a request is a background/utility task.
  *
@@ -131,44 +212,7 @@ export function isBackgroundTask(
   body: BackgroundTaskBody | unknown,
   headers: Record<string, string> | null = null
 ): boolean {
-  if (!body || typeof body !== "object") return false;
-  const typedBody = body as BackgroundTaskBody;
-
-  // 1. Check explicit header
-  if (headers) {
-    const priority =
-      headers["x-request-priority"] || headers["X-Request-Priority"] || headers["x-initiator"];
-    if (priority === "background" || priority === "Background") return true;
-  }
-
-  // 2. Check system prompt for background task patterns
-  const messages = toMessageArray(typedBody.messages ?? typedBody.input ?? []);
-  if (!Array.isArray(messages) || messages.length === 0) return false;
-
-  // Find system message
-  const systemMsg = messages.find(
-    (message: BackgroundMessage) => message.role === "system" || message.role === "developer"
-  );
-  if (!systemMsg) return false;
-
-  const systemContent =
-    typeof systemMsg.content === "string" ? systemMsg.content.toLowerCase() : "";
-
-  if (!systemContent) return false;
-
-  // Check against detection patterns
-  const matched = _config.detectionPatterns.some((pattern) =>
-    systemContent.includes(pattern.toLowerCase())
-  );
-
-  if (!matched) return false;
-
-  // 3. Additional heuristic: background tasks typically have very few messages
-  // (system + 1-2 user messages)
-  const userMessages = messages.filter((message: BackgroundMessage) => message.role === "user");
-  if (userMessages.length > 3) return false; // Too many turns for a background task
-
-  return true;
+  return getBackgroundTaskReason(body, headers) !== null;
 }
 
 /**
diff --git a/open-sse/services/sessionManager.ts b/open-sse/services/sessionManager.ts
index 66ee3d09..7dd67758 100644
--- a/open-sse/services/sessionManager.ts
+++ b/open-sse/services/sessionManager.ts
@@ -197,6 +197,17 @@ export function getActiveSessionCountForKey(apiKeyId: string): number {
   return activeSessionsByKey.get(apiKeyId)?.size ?? 0;
 }
 
+/**
+ * Snapshot of active session counts per API key.
+ */
+export function getAllActiveSessionCountsByKey(): Record<string, number> {
+  const out: Record<string, number> = {};
+  for (const [apiKeyId, sessionIds] of activeSessionsByKey) {
+    out[apiKeyId] = sessionIds.size;
+  }
+  return out;
+}
+
 /**
  * T08: Register a session as belonging to an API key.
  * Call this after session creation is allowed (i.e., limit check passed).
diff --git a/open-sse/services/thinkingBudget.ts b/open-sse/services/thinkingBudget.ts
index b0b4b170..4028037c 100644
--- a/open-sse/services/thinkingBudget.ts
+++ b/open-sse/services/thinkingBudget.ts
@@ -29,9 +29,9 @@ export const EFFORT_BUDGETS = {
 // Used when clients send string-based thinking levels (e.g., VS Code Copilot)
 export const THINKING_LEVEL_MAP = {
   none: 0,
-  low: 1024,
-  medium: 10240,
-  high: 131072,
+  low: 4096,
+  medium: 8192,
+  high: 24576,
   max: 131072, // T11: max = full Claude budget (sub2api: xhigh)
   xhigh: 131072, // T11: explicit xhigh alias
 };
@@ -75,7 +75,8 @@ export function normalizeThinkingLevel(body) {
   // Handle top-level thinkingLevel or thinking_level string fields
   const levelStr = result.thinkingLevel || result.thinking_level;
   if (typeof levelStr === "string" && THINKING_LEVEL_MAP[levelStr.toLowerCase()] !== undefined) {
-    const budget = THINKING_LEVEL_MAP[levelStr.toLowerCase()];
+    const rawBudget = THINKING_LEVEL_MAP[levelStr.toLowerCase()];
+    const budget = capThinkingBudget(result.model || "", rawBudget);
     // Convert to Claude thinking format as canonical representation
     result.thinking = {
       type: budget > 0 ? "enabled" : "disabled",
@@ -93,7 +94,8 @@ export function normalizeThinkingLevel(body) {
     typeof geminiLevel === "string" &&
     THINKING_LEVEL_MAP[geminiLevel.toLowerCase()] !== undefined
   ) {
-    const budget = THINKING_LEVEL_MAP[geminiLevel.toLowerCase()];
+    const rawBudget = THINKING_LEVEL_MAP[geminiLevel.toLowerCase()];
+    const budget = capThinkingBudget(result.model || "", rawBudget);
     result.generationConfig = {
       ...result.generationConfig,
       thinkingConfig: { ...result.generationConfig.thinkingConfig, thinkingBudget: budget },
diff --git a/open-sse/utils/aiSdkCompat.ts b/open-sse/utils/aiSdkCompat.ts
new file mode 100644
index 00000000..ec206f08
--- /dev/null
+++ b/open-sse/utils/aiSdkCompat.ts
@@ -0,0 +1,31 @@
+/**
+ * AI SDK compatibility helpers (T26).
+ */
+
+/**
+ * Detects when a client explicitly prefers JSON (non-SSE) responses.
+ */
+export function clientWantsJsonResponse(acceptHeader: unknown): boolean {
+  if (typeof acceptHeader !== "string") return false;
+  const normalized = acceptHeader.toLowerCase();
+  return normalized.includes("application/json") && !normalized.includes("text/event-stream");
+}
+
+/**
+ * Resolves stream behavior from request body + Accept header.
+ * OpenAI-compatible behavior: stream only when `stream: true` and client did not force JSON.
+ */
+export function resolveStreamFlag(bodyStream: unknown, acceptHeader: unknown): boolean {
+  return bodyStream === true && !clientWantsJsonResponse(acceptHeader);
+}
+
+/**
+ * Removes surrounding markdown code fences when Claude wraps JSON payloads.
+ * Example: ```json\n{"ok":true}\n``` -> {"ok":true}
+ */
+export function stripMarkdownCodeFence(text: unknown): unknown {
+  if (typeof text !== "string") return text;
+  const codeBlockRegex = /^```(?:json|javascript|typescript|js|ts)?\s*\n?([\s\S]*?)\n?```\s*$/i;
+  const match = text.trim().match(codeBlockRegex);
+  return match ? match[1].trim() : text;
+}
diff --git a/open-sse/utils/proxyFetch.ts b/open-sse/utils/proxyFetch.ts
index ee1ed0f7..09266a34 100644
--- a/open-sse/utils/proxyFetch.ts
+++ b/open-sse/utils/proxyFetch.ts
@@ -6,6 +6,7 @@ import {
   proxyUrlForLogs,
 } from "./proxyDispatcher.ts";
 import tlsClient from "./tlsClient.ts";
+import { isProxyReachable } from "@/lib/proxyHealth";
 
 function isTlsFingerprintEnabled() {
   return process.env.ENABLE_TLS_FINGERPRINT === "true";
@@ -134,6 +135,22 @@ export async function runWithProxyContext(proxyConfig, fn) {
 
   const resolvedProxyUrl = proxyConfig ? proxyConfigToUrl(proxyConfig) : null;
 
+  // T14: Proxy Fast-Fail
+  // Perform a short TCP reachability check before issuing upstream requests.
+  if (resolvedProxyUrl) {
+    const reachable = await isProxyReachable(resolvedProxyUrl);
+    if (!reachable) {
+      const proxyLabel = proxyUrlForLogs(resolvedProxyUrl);
+      const err = new Error(`[Proxy Fast-Fail] Proxy unreachable: ${proxyLabel}`) as Error & {
+        code?: string;
+        statusCode?: number;
+      };
+      err.code = "PROXY_UNREACHABLE";
+      err.statusCode = 503;
+      throw err;
+    }
+  }
+
   return proxyContext.run(proxyConfig || null, async () => {
     if (resolvedProxyUrl) {
       console.log(
diff --git a/src/app/(dashboard)/dashboard/api-manager/ApiManagerPageClient.tsx b/src/app/(dashboard)/dashboard/api-manager/ApiManagerPageClient.tsx
index 75e59468..2155a01e 100644
--- a/src/app/(dashboard)/dashboard/api-manager/ApiManagerPageClient.tsx
+++ b/src/app/(dashboard)/dashboard/api-manager/ApiManagerPageClient.tsx
@@ -69,6 +69,7 @@ interface ApiKey {
   noLog?: boolean;
   autoResolve?: boolean;
   isActive?: boolean;
+  maxSessions?: number;
   accessSchedule?: AccessSchedule | null;
   createdAt: string;
 }
@@ -109,6 +110,7 @@ export default function ApiManagerPageClient() {
   const [error, setError] = useState<string | null>(null);
   const [isSubmitting, setIsSubmitting] = useState(false);
   const [usageStats, setUsageStats] = useState<Record<string, KeyUsageStats>>({});
+  const [sessionCounts, setSessionCounts] = useState<Record<string, number>>({});
 
   const { copied, copy } = useCopyToClipboard();
 
@@ -150,6 +152,7 @@ export default function ApiManagerPageClient() {
         setKeys(data.keys || []);
         // Fetch usage stats after keys are loaded
         fetchUsageStats(data.keys || []);
+        fetchSessionCounts(data.keys || []);
       }
     } catch (error) {
       console.log("Error fetching keys:", error);
@@ -187,6 +190,31 @@ export default function ApiManagerPageClient() {
     }
   };
 
+  const fetchSessionCounts = async (apiKeys: ApiKey[]) => {
+    if (apiKeys.length === 0) {
+      setSessionCounts({});
+      return;
+    }
+    try {
+      const res = await fetch("/api/sessions");
+      if (!res.ok) return;
+      const data = await res.json();
+      const byApiKeyRaw =
+        data && typeof data.byApiKey === "object" && !Array.isArray(data.byApiKey)
+          ? data.byApiKey
+          : {};
+      const normalized: Record<string, number> = {};
+      for (const key of apiKeys) {
+        const value = byApiKeyRaw[key.id];
+        normalized[key.id] =
+          typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0;
+      }
+      setSessionCounts(normalized);
+    } catch (error) {
+      console.log("Error fetching session counts:", error);
+    }
+  };
+
   const clearError = useCallback(() => setError(null), []);
 
   const handleCreateKey = async () => {
@@ -266,6 +294,7 @@ export default function ApiManagerPageClient() {
     allowedConnections: string[],
     autoResolve: boolean,
     isActive: boolean,
+    maxSessions: number,
     accessSchedule: AccessSchedule | null
   ) => {
     if (!editingKey || !editingKey.id) return;
@@ -291,6 +320,10 @@ export default function ApiManagerPageClient() {
     const validConnections = allowedConnections.filter(
       (id) => typeof id === "string" && /^[0-9a-f-]{36}$/i.test(id)
     );
+    const normalizedMaxSessions =
+      typeof maxSessions === "number" && Number.isFinite(maxSessions)
+        ? Math.max(0, Math.floor(maxSessions))
+        : 0;
 
     setIsSubmitting(true);
     clearError();
@@ -305,6 +338,7 @@ export default function ApiManagerPageClient() {
           noLog,
           autoResolve,
           isActive,
+          maxSessions: normalizedMaxSessions,
           accessSchedule,
         }),
       });
@@ -505,6 +539,9 @@ export default function ApiManagerPageClient() {
                 Array.isArray(key.allowedConnections) && key.allowedConnections.length > 0;
               const noLogEnabled = key.noLog === true;
               const keyIsActive = key.isActive !== false; // default true
+              const maxSessions = typeof key.maxSessions === "number" ? key.maxSessions : 0;
+              const hasSessionLimit = maxSessions > 0;
+              const activeSessions = sessionCounts[key.id] || 0;
               const hasSchedule = key.accessSchedule?.enabled === true;
               return (
                 <div
@@ -574,6 +611,12 @@ export default function ApiManagerPageClient() {
                           Auto-Resolve
                         </span>
                       )}
+                      {hasSessionLimit && (
+                        <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-md bg-indigo-500/10 text-indigo-600 dark:text-indigo-400 text-[11px] font-medium">
+                          <span className="material-symbols-outlined text-[12px]">group</span>
+                          Sessions: {activeSessions}/{maxSessions}
+                        </span>
+                      )}
                       {!keyIsActive && (
                         <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-md bg-red-500/10 text-red-600 dark:text-red-400 text-[11px] font-medium">
                           <span className="material-symbols-outlined text-[12px]">block</span>
@@ -778,6 +821,7 @@ const PermissionsModal = memo(function PermissionsModal({
     connections: string[],
     autoResolve: boolean,
     isActive: boolean,
+    maxSessions: number,
     accessSchedule: AccessSchedule | null
   ) => void;
 }) {
@@ -794,6 +838,9 @@ const PermissionsModal = memo(function PermissionsModal({
   const [noLogEnabled, setNoLogEnabled] = useState(apiKey?.noLog === true);
   const [autoResolveEnabled, setAutoResolveEnabled] = useState(apiKey?.autoResolve === true);
   const [keyIsActive, setKeyIsActive] = useState(apiKey?.isActive !== false);
+  const [maxSessions, setMaxSessions] = useState(
+    typeof apiKey?.maxSessions === "number" && apiKey.maxSessions > 0 ? apiKey.maxSessions : 0
+  );
   const [scheduleEnabled, setScheduleEnabled] = useState(apiKey?.accessSchedule?.enabled === true);
   const [scheduleFrom, setScheduleFrom] = useState(apiKey?.accessSchedule?.from ?? "08:00");
   const [scheduleUntil, setScheduleUntil] = useState(apiKey?.accessSchedule?.until ?? "18:00");
@@ -905,6 +952,7 @@ const PermissionsModal = memo(function PermissionsModal({
       allowAllConnections ? [] : selectedConnections,
       autoResolveEnabled,
       keyIsActive,
+      maxSessions,
       schedule
     );
   }, [
@@ -916,6 +964,7 @@ const PermissionsModal = memo(function PermissionsModal({
     selectedConnections,
     autoResolveEnabled,
     keyIsActive,
+    maxSessions,
     scheduleEnabled,
     scheduleFrom,
     scheduleUntil,
@@ -1007,6 +1056,28 @@ const PermissionsModal = memo(function PermissionsModal({
           </button>
         </div>
 
+        {/* Max Sessions Limit (T08) */}
+        <div className="flex items-start justify-between gap-3 p-3 rounded-lg border border-border bg-surface/40">
+          <div className="flex flex-col gap-1">
+            <p className="text-sm font-medium text-text-main">Max Active Sessions</p>
+            <p className="text-xs text-text-muted">
+              0 = unlimited. Return 429 when this key exceeds concurrent sticky sessions.
+            </p>
+          </div>
+          <div className="w-32">
+            <Input
+              type="number"
+              min={0}
+              step={1}
+              value={String(maxSessions)}
+              onChange={(e) => {
+                const parsed = Number.parseInt(e.target.value || "0", 10);
+                setMaxSessions(Number.isFinite(parsed) && parsed > 0 ? parsed : 0);
+              }}
+            />
+          </div>
+        </div>
+
         {/* Access Schedule */}
         <div className="flex flex-col gap-2 p-3 rounded-lg border border-border bg-surface/40">
           <div className="flex items-start justify-between gap-3">
diff --git a/src/app/(dashboard)/dashboard/cli-tools/components/DefaultToolCard.tsx b/src/app/(dashboard)/dashboard/cli-tools/components/DefaultToolCard.tsx
index bafd4852..7af7eebf 100644
--- a/src/app/(dashboard)/dashboard/cli-tools/components/DefaultToolCard.tsx
+++ b/src/app/(dashboard)/dashboard/cli-tools/components/DefaultToolCard.tsx
@@ -153,7 +153,7 @@ export default function DefaultToolCard({
   };
 
   // Check if this tool supports direct config file write
-  const supportsDirectSave = ["continue"].includes(toolId);
+  const supportsDirectSave = ["continue", "opencode"].includes(toolId);
 
   const renderApiKeySelector = () => {
     return (
diff --git a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/ProviderLimitCard.tsx b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/ProviderLimitCard.tsx
index 4ea0c27c..f41acf90 100644
--- a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/ProviderLimitCard.tsx
+++ b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/ProviderLimitCard.tsx
@@ -165,6 +165,7 @@ export default function ProviderLimitCard({
                 percentage={percentage}
                 unlimited={unlimited}
                 resetTime={quota.resetAt}
+                staleAfterReset={quota.staleAfterReset === true}
               />
             );
           })}
diff --git a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaProgressBar.tsx b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaProgressBar.tsx
index 8885f9e2..51ff62bc 100644
--- a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaProgressBar.tsx
+++ b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaProgressBar.tsx
@@ -71,6 +71,7 @@ export default function QuotaProgressBar({
   total = 0,
   unlimited = false,
   resetTime = null,
+  staleAfterReset = false,
 }) {
   const colors = getColorClasses(percentage);
   const countdown = formatResetTime(resetTime);
@@ -105,12 +106,17 @@ export default function QuotaProgressBar({
         <span>
           {used.toLocaleString()} / {total.toLocaleString()} requests
         </span>
-        {countdown !== "-" && (
+        {staleAfterReset ? (
+          <div className="flex items-center gap-1">
+            <span>⟳</span>
+            <span className="font-medium">Refreshing...</span>
+          </div>
+        ) : countdown !== "-" ? (
           <div className="flex items-center gap-1">
             <span>•</span>
             <span className="font-medium">Reset in {countdown}</span>
           </div>
-        )}
+        ) : null}
       </div>
 
       {/* Reset time display */}
diff --git a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaTable.tsx b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaTable.tsx
index eab32f59..47e8dcf7 100644
--- a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaTable.tsx
+++ b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/QuotaTable.tsx
@@ -92,6 +92,7 @@ export default function QuotaTable({ quotas = [] }) {
               quota.remainingPercentage !== undefined
                 ? Math.round(quota.remainingPercentage)
                 : calculatePercentage(quota.used, quota.total);
+            const staleAfterReset = quota.staleAfterReset === true;
 
             const colors = getColorClasses(remaining);
             const countdown = formatResetTime(quota.resetAt);
@@ -140,7 +141,9 @@ export default function QuotaTable({ quotas = [] }) {
 
                 {/* Reset Time */}
                 <td className="py-2 px-3">
-                  {countdown !== t("notAvailableSymbol") || resetDisplay ? (
+                  {staleAfterReset ? (
+                    <div className="text-xs text-text-muted">⟳ Refreshing...</div>
+                  ) : countdown !== t("notAvailableSymbol") || resetDisplay ? (
                     <div className="space-y-0.5">
                       {countdown !== t("notAvailableSymbol") && (
                         <div className="text-sm text-text-primary font-medium">
diff --git a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/index.tsx b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/index.tsx
index 56421b55..6f535e13 100644
--- a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/index.tsx
+++ b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/index.tsx
@@ -122,6 +122,7 @@ export default function ProviderLimits() {
   const intervalRef = useRef(null);
   const countdownRef = useRef(null);
   const lastFetchTimeRef = useRef({});
+  const staleProbeRef = useRef({});
 
   const fetchConnections = useCallback(async () => {
     try {
@@ -137,11 +138,12 @@ export default function ProviderLimits() {
     }
   }, []);
 
-  const fetchQuota = useCallback(async (connectionId, provider) => {
+  const fetchQuota = useCallback(async (connectionId, provider, options = {}) => {
+    const force = options?.force === true;
     // Debounce: skip if last fetch was < MIN_FETCH_INTERVAL_MS ago
     const now = Date.now();
     const lastFetch = lastFetchTimeRef.current[connectionId] || 0;
-    if (now - lastFetch < MIN_FETCH_INTERVAL_MS) {
+    if (!force && now - lastFetch < MIN_FETCH_INTERVAL_MS) {
       return; // Skip, data is still fresh
     }
     lastFetchTimeRef.current[connectionId] = now;
@@ -165,6 +167,20 @@ export default function ProviderLimits() {
       }
       const data = await response.json();
       const parsedQuotas = parseQuotaData(provider, data);
+
+      // T13: If resetAt already passed but provider still returned stale cumulative usage,
+      // display 0 immediately and trigger a background probe to refresh snapshot.
+      const hasStaleAfterReset = parsedQuotas.some((q) => q?.staleAfterReset === true);
+      if (hasStaleAfterReset) {
+        const lastProbeAt = staleProbeRef.current[connectionId] || 0;
+        if (Date.now() - lastProbeAt >= MIN_FETCH_INTERVAL_MS) {
+          staleProbeRef.current[connectionId] = Date.now();
+          setTimeout(() => {
+            fetchQuota(connectionId, provider, { force: true }).catch(() => {});
+          }, 5000);
+        }
+      }
+
       setQuotaData((prev) => ({
         ...prev,
         [connectionId]: {
@@ -571,6 +587,7 @@ export default function ProviderLimits() {
                       const colors = getBarColor(remaining);
                       const cd = formatCountdown(q.resetAt);
                       const shortName = getShortModelName(q.name);
+                      const staleAfterReset = q.staleAfterReset === true;
 
                       return (
                         <div key={i} className="flex items-center gap-1.5 min-w-[200px] shrink-0">
@@ -583,11 +600,15 @@ export default function ProviderLimits() {
                           </span>
 
                           {/* Countdown */}
-                          {cd && (
+                          {staleAfterReset ? (
+                            <span className="text-[10px] text-text-muted whitespace-nowrap">
+                              ⟳ Refreshing...
+                            </span>
+                          ) : cd ? (
                             <span className="text-[10px] text-text-muted whitespace-nowrap">
                               ⏱ {cd}
                             </span>
-                          )}
+                          ) : null}
 
                           {/* Progress bar */}
                           <div className="flex-1 h-1.5 rounded-sm bg-white/[0.06] min-w-[60px] overflow-hidden">
diff --git a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx
index 7adf8c4d..a7fcd114 100644
--- a/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx
+++ b/src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx
@@ -76,6 +76,40 @@ export function calculatePercentage(used, total) {
   return Math.round(((total - used) / total) * 100);
 }
 
+function isPastResetWindow(resetAt) {
+  if (!resetAt) return false;
+  const resetTime =
+    typeof resetAt === "number" ? resetAt : typeof resetAt === "string" ? Date.parse(resetAt) : NaN;
+  if (!Number.isFinite(resetTime)) return false;
+  return Date.now() >= resetTime;
+}
+
+function normalizeQuotaEntry(name, quota = {}, extras = {}) {
+  const usedRaw = Number(quota?.used || 0);
+  const totalRaw = Number(quota?.total || 0);
+  const resetAt = quota?.resetAt || null;
+  const staleAfterReset = isPastResetWindow(resetAt);
+  const used = staleAfterReset ? 0 : usedRaw;
+  const total = Number.isFinite(totalRaw) ? totalRaw : 0;
+  const remainingPercentageRaw = safePercentage(quota?.remainingPercentage);
+  const remainingPercentage =
+    staleAfterReset && total > 0
+      ? 100
+      : remainingPercentageRaw !== undefined
+        ? remainingPercentageRaw
+        : undefined;
+
+  return {
+    name,
+    used: Number.isFinite(used) ? used : 0,
+    total,
+    resetAt,
+    staleAfterReset,
+    ...(remainingPercentage !== undefined ? { remainingPercentage } : {}),
+    ...extras,
+  };
+}
+
 /**
  * Parse provider-specific quota structures into normalized array
  * @param {string} provider - Provider name (github, antigravity, codex, kiro, claude)
@@ -95,13 +129,7 @@ export function parseQuotaData(provider, data) {
             if (quota?.unlimited && (!quota?.total || quota.total <= 0)) {
               return;
             }
-            normalizedQuotas.push({
-              name,
-              used: quota.used || 0,
-              total: quota.total || 0,
-              resetAt: quota.resetAt || null,
-              remainingPercentage: safePercentage(quota.remainingPercentage),
-            });
+            normalizedQuotas.push(normalizeQuotaEntry(name, quota));
           });
         }
         break;
@@ -109,14 +137,11 @@ export function parseQuotaData(provider, data) {
       case "antigravity":
         if (data.quotas) {
           Object.entries(data.quotas).forEach(([modelKey, quota]: [string, any]) => {
-            normalizedQuotas.push({
-              name: quota.displayName || modelKey,
-              modelKey: modelKey, // Keep modelKey for sorting
-              used: quota.used || 0,
-              total: quota.total || 0,
-              resetAt: quota.resetAt || null,
-              remainingPercentage: safePercentage(quota.remainingPercentage),
-            });
+            normalizedQuotas.push(
+              normalizeQuotaEntry(quota.displayName || modelKey, quota, {
+                modelKey: modelKey, // Keep modelKey for sorting
+              })
+            );
           });
         }
         break;
@@ -124,12 +149,7 @@ export function parseQuotaData(provider, data) {
       case "codex":
         if (data.quotas) {
           Object.entries(data.quotas).forEach(([quotaType, quota]: [string, any]) => {
-            normalizedQuotas.push({
-              name: quotaType,
-              used: quota.used || 0,
-              total: quota.total || 0,
-              resetAt: quota.resetAt || null,
-            });
+            normalizedQuotas.push(normalizeQuotaEntry(quotaType, quota));
           });
         }
         break;
@@ -137,12 +157,7 @@ export function parseQuotaData(provider, data) {
       case "kiro":
         if (data.quotas) {
           Object.entries(data.quotas).forEach(([quotaType, quota]: [string, any]) => {
-            normalizedQuotas.push({
-              name: quotaType,
-              used: quota.used || 0,
-              total: quota.total || 0,
-              resetAt: quota.resetAt || null,
-            });
+            normalizedQuotas.push(normalizeQuotaEntry(quotaType, quota));
           });
         }
         break;
@@ -159,13 +174,7 @@ export function parseQuotaData(provider, data) {
           });
         } else if (data.quotas) {
           Object.entries(data.quotas).forEach(([name, quota]: [string, any]) => {
-            normalizedQuotas.push({
-              name,
-              used: quota.used || 0,
-              total: quota.total || 0,
-              resetAt: quota.resetAt || null,
-              remainingPercentage: safePercentage(quota.remainingPercentage),
-            });
+            normalizedQuotas.push(normalizeQuotaEntry(name, quota));
           });
         }
         break;
@@ -174,12 +183,7 @@ export function parseQuotaData(provider, data) {
         // Generic fallback for unknown providers
         if (data.quotas) {
           Object.entries(data.quotas).forEach(([name, quota]: [string, any]) => {
-            normalizedQuotas.push({
-              name,
-              used: quota.used || 0,
-              total: quota.total || 0,
-              resetAt: quota.resetAt || null,
-            });
+            normalizedQuotas.push(normalizeQuotaEntry(name, quota));
           });
         }
     }
@@ -218,11 +222,7 @@ export function normalizePlanTier(plan) {
 
   const upper = raw.toUpperCase();
 
-  if (
-    upper.includes("PRO+") ||
-    upper.includes("PRO PLUS") ||
-    upper.includes("PROPLUS")
-  ) {
+  if (upper.includes("PRO+") || upper.includes("PRO PLUS") || upper.includes("PROPLUS")) {
     return { key: "plus", label: "Pro+", variant: "secondary", rank: 4, raw };
   }
 
diff --git a/src/app/api/cli-tools/guide-settings/[toolId]/route.ts b/src/app/api/cli-tools/guide-settings/[toolId]/route.ts
index 2f0fd1fc..2d5e4e7e 100644
--- a/src/app/api/cli-tools/guide-settings/[toolId]/route.ts
+++ b/src/app/api/cli-tools/guide-settings/[toolId]/route.ts
@@ -3,6 +3,8 @@ import fs from "fs/promises";
 import path from "path";
 import os from "os";
 import { getRuntimePorts } from "@/lib/runtime/ports";
+import { getOpenCodeConfigPath } from "@/shared/services/cliRuntime";
+import { mergeOpenCodeConfig } from "@/shared/services/opencodeConfig";
 import { guideSettingsSaveSchema } from "@/shared/validation/schemas";
 import { isValidationFailure, validateBody } from "@/shared/validation/helpers";
 
@@ -10,7 +12,7 @@ import { isValidationFailure, validateBody } from "@/shared/validation/helpers";
  * POST /api/cli-tools/guide-settings/:toolId
  *
  * Save configuration for guide-based tools that have config files.
- * Currently supports: continue
+ * Currently supports: continue, opencode
  */
 export async function POST(request, { params }) {
   let rawBody;
@@ -131,50 +133,39 @@ async function saveContinueConfig({ baseUrl, apiKey, model }) {
 }
 
 /**
- * Save OpenCode config to ~/.config/opencode/config.toml (XDG_CONFIG_HOME aware).
+ * Save OpenCode config to:
+ * - Linux/macOS: ~/.config/opencode/opencode.json (XDG_CONFIG_HOME aware)
+ * - Windows: %APPDATA%/opencode/opencode.json
+ *
  * (#524) OpenCode was silently failing because this handler was missing.
  */
 async function saveOpenCodeConfig({ baseUrl, apiKey, model }) {
-  const { apiPort } = getRuntimePorts();
-  // Honour $XDG_CONFIG_HOME if set, otherwise use ~/.config per the XDG Base Directory spec
-  const xdgConfigHome = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), ".config");
-  const configPath = path.join(xdgConfigHome, "opencode", "config.toml");
+  const configPath = getOpenCodeConfigPath();
   const configDir = path.dirname(configPath);
 
-  // Ensure ~/.config/opencode/ exists
+  // Ensure config directory exists
   await fs.mkdir(configDir, { recursive: true });
 
   const normalizedBaseUrl = String(baseUrl || "")
     .trim()
     .replace(/\/+$/, "");
 
-  // Read existing TOML to preserve any user settings outside our block
-  let existingContent = "";
+  // Read existing JSON to preserve other provider entries
+  let existingConfig: Record<string, any> = {};
   try {
-    existingContent = await fs.readFile(configPath, "utf-8");
+    const raw = await fs.readFile(configPath, "utf-8");
+    existingConfig = JSON.parse(raw);
   } catch {
-    // File doesn't exist yet — start fresh
+    // File doesn't exist or invalid JSON — start fresh
   }
 
-  // Build the OmniRoute TOML block.
-  // opencode config.toml uses the [provider.X] table format.
-  void apiPort; // available for future port-based detection
-  const omniBlock = `
-# OmniRoute managed — updated automatically by OmniRoute CLI Tools
-[provider.omniroute]
-api_key = "${apiKey || "sk_omniroute"}"
-base_url = "${normalizedBaseUrl}"
-model = "${model}"
-`;
+  const nextConfig = mergeOpenCodeConfig(existingConfig, {
+    baseUrl: normalizedBaseUrl,
+    apiKey,
+    model,
+  });
 
-  // Remove old OmniRoute-managed block (if any) then append fresh one
-  const cleanedContent = existingContent
-    .replace(/\n?# OmniRoute managed[\s\S]*?(?=\n\[|$)/, "")
-    .trimEnd();
-
-  const newContent = (cleanedContent ? cleanedContent + "\n" : "") + omniBlock;
-
-  await fs.writeFile(configPath, newContent, "utf-8");
+  await fs.writeFile(configPath, JSON.stringify(nextConfig, null, 2), "utf-8");
 
   return NextResponse.json({
     success: true,
diff --git a/src/app/api/keys/[id]/route.ts b/src/app/api/keys/[id]/route.ts
index 499a9f72..e3f3b0b7 100644
--- a/src/app/api/keys/[id]/route.ts
+++ b/src/app/api/keys/[id]/route.ts
@@ -62,6 +62,7 @@ export async function PATCH(request, { params }) {
       noLog,
       autoResolve,
       isActive,
+      maxSessions,
       accessSchedule,
     } = validation.data;
 
@@ -72,6 +73,7 @@ export async function PATCH(request, { params }) {
     if (noLog !== undefined) payload.noLog = noLog;
     if (autoResolve !== undefined) payload.autoResolve = autoResolve;
     if (isActive !== undefined) payload.isActive = isActive;
+    if (maxSessions !== undefined) payload.maxSessions = maxSessions;
     if (accessSchedule !== undefined) payload.accessSchedule = accessSchedule;
 
     const updated = await updateApiKeyPermissions(id, payload);
@@ -90,6 +92,7 @@ export async function PATCH(request, { params }) {
       ...(noLog !== undefined && { noLog }),
       ...(autoResolve !== undefined && { autoResolve }),
       ...(isActive !== undefined && { isActive }),
+      ...(maxSessions !== undefined && { maxSessions }),
       ...(accessSchedule !== undefined && { accessSchedule }),
     });
   } catch (error) {
diff --git a/src/app/api/providers/[id]/test/route.ts b/src/app/api/providers/[id]/test/route.ts
index ece35e28..0f04052e 100644
--- a/src/app/api/providers/[id]/test/route.ts
+++ b/src/app/api/providers/[id]/test/route.ts
@@ -516,6 +516,7 @@ async function testApiKeyConnection(connection: any) {
   return {
     valid: !!result.valid,
     error,
+    warning: result.warning || null,
     diagnosis,
   };
 }
@@ -667,6 +668,7 @@ export async function testSingleConnection(connectionId: string, validationModel
   return {
     valid: result.valid,
     error: result.error,
+    warning: result.warning || null,
     refreshed: result.refreshed || false,
     diagnosis,
     latencyMs,
diff --git a/src/app/api/providers/validate/route.ts b/src/app/api/providers/validate/route.ts
index 61d82a2f..60917878 100644
--- a/src/app/api/providers/validate/route.ts
+++ b/src/app/api/providers/validate/route.ts
@@ -63,6 +63,8 @@ export async function POST(request) {
     return NextResponse.json({
       valid: !!result.valid,
       error: result.valid ? null : result.error || "Invalid API key",
+      warning: result.warning || null,
+      method: result.method || null,
     });
   } catch (error) {
     console.log("Error validating API key:", error);
diff --git a/src/app/api/sessions/route.ts b/src/app/api/sessions/route.ts
index 16dff15d..cea6a9c6 100644
--- a/src/app/api/sessions/route.ts
+++ b/src/app/api/sessions/route.ts
@@ -2,13 +2,15 @@ import { NextResponse } from "next/server";
 import {
   getActiveSessions,
   getActiveSessionCount,
+  getAllActiveSessionCountsByKey,
 } from "@omniroute/open-sse/services/sessionManager.ts";
 
 export async function GET() {
   try {
     const sessions = getActiveSessions();
     const count = getActiveSessionCount();
-    return NextResponse.json({ count, sessions });
+    const byApiKey = getAllActiveSessionCountsByKey();
+    return NextResponse.json({ count, sessions, byApiKey });
   } catch (error) {
     return NextResponse.json({ error: error.message }, { status: 500 });
   }
diff --git a/src/lib/providers/validation.ts b/src/lib/providers/validation.ts
index ae71e4e8..f5ca56e1 100644
--- a/src/lib/providers/validation.ts
+++ b/src/lib/providers/validation.ts
@@ -99,8 +99,10 @@ async function validateOpenAILikeProvider({
     return { valid: false, error: `Validation failed: ${modelsRes.status}` };
   }
 
+  const testModelId = (providerSpecificData as any)?.validationModelId || modelId;
+
   const testBody = {
-    model: modelId,
+    model: testModelId,
     messages: [{ role: "user", content: "test" }],
     max_tokens: 1,
   };
@@ -131,7 +133,13 @@ async function validateOpenAILikeProvider({
   return { valid: true, error: null };
 }
 
-async function validateAnthropicLikeProvider({ apiKey, baseUrl, modelId, headers = {} }: any) {
+async function validateAnthropicLikeProvider({
+  apiKey,
+  baseUrl,
+  modelId,
+  headers = {},
+  providerSpecificData = {},
+}: any) {
   if (!baseUrl) {
     return { valid: false, error: "Missing base URL" };
   }
@@ -149,11 +157,14 @@ async function validateAnthropicLikeProvider({ apiKey, baseUrl, modelId, headers
     requestHeaders["anthropic-version"] = "2023-06-01";
   }
 
+  const testModelId =
+    providerSpecificData?.validationModelId || modelId || "claude-3-5-sonnet-20241022";
+
   const response = await fetch(baseUrl, {
     method: "POST",
     headers: requestHeaders,
     body: JSON.stringify({
-      model: modelId || "claude-3-5-sonnet-20241022",
+      model: testModelId,
       max_tokens: 1,
       messages: [{ role: "user", content: "test" }],
     }),
@@ -352,52 +363,104 @@ async function validateOpenAICompatibleProvider({ apiKey, providerSpecificData =
     return { valid: false, error: "No base URL configured for OpenAI compatible provider" };
   }
 
+  const validationModelId =
+    typeof providerSpecificData?.validationModelId === "string"
+      ? providerSpecificData.validationModelId.trim()
+      : "";
+
   // Step 1: Try GET /models
+  let modelsReachable = false;
   try {
     const modelsRes = await fetch(`${baseUrl}/models`, {
       method: "GET",
       headers: buildBearerHeaders(apiKey),
     });
 
+    modelsReachable = true;
+
     if (modelsRes.ok) {
-      return { valid: true, error: null };
+      return { valid: true, error: null, method: "models_endpoint" };
     }
 
     if (modelsRes.status === 401 || modelsRes.status === 403) {
       return { valid: false, error: "Invalid API key" };
     }
+
+    // Endpoint responded and auth seems valid, but quota is exhausted/rate-limited.
+    if (modelsRes.status === 429) {
+      return {
+        valid: true,
+        error: null,
+        method: "models_endpoint",
+        warning: "Rate limited, but credentials are valid",
+      };
+    }
   } catch {
     // /models fetch failed (network error, etc.) — fall through to chat test
   }
 
+  // T25: if /models cannot be used and no custom model was provided, return a
+  // clear actionable message instead of a generic connection error.
+  if (!validationModelId) {
+    return {
+      valid: false,
+      error: "Endpoint /models unavailable. Provide a Model ID to validate via /chat/completions.",
+    };
+  }
+
   // Step 2: Fallback — try a minimal chat completion request
   // Many providers don't expose /models but accept chat completions fine
   const apiType = providerSpecificData.apiType || "chat";
   const chatSuffix = apiType === "responses" ? "/responses" : "/chat/completions";
   const chatUrl = `${baseUrl}${chatSuffix}`;
+  const testModelId = validationModelId;
 
   try {
     const chatRes = await fetch(chatUrl, {
       method: "POST",
       headers: buildBearerHeaders(apiKey),
       body: JSON.stringify({
-        model: "gpt-4o-mini",
+        model: testModelId,
         messages: [{ role: "user", content: "test" }],
         max_tokens: 1,
       }),
     });
 
     if (chatRes.ok) {
-      return { valid: true, error: null };
+      return { valid: true, error: null, method: "chat_completions" };
     }
 
     if (chatRes.status === 401 || chatRes.status === 403) {
       return { valid: false, error: "Invalid API key" };
     }
 
+    if (chatRes.status === 429) {
+      return {
+        valid: true,
+        error: null,
+        method: "chat_completions",
+        warning: "Rate limited, but credentials are valid",
+      };
+    }
+
+    // If /models was reachable but returned non-auth error, and chat succeeds
+    // auth-wise, this still confirms credentials are valid.
+    if (chatRes.status === 400) {
+      return {
+        valid: true,
+        error: null,
+        method: "inference_available",
+        warning: "Model ID may be invalid, but credentials are valid",
+      };
+    }
+
     // 4xx other than auth (e.g. 400 bad model, 422) usually means auth passed
     if (chatRes.status >= 400 && chatRes.status < 500) {
-      return { valid: true, error: null };
+      return {
+        valid: true,
+        error: null,
+        method: "inference_available",
+      };
     }
 
     if (chatRes.status >= 500) {
@@ -410,6 +473,10 @@ async function validateOpenAICompatibleProvider({ apiKey, providerSpecificData =
   // Step 3: Final fallback — simple connectivity check
   // For local providers (Ollama, LM Studio, etc.) that may not respond to
   // standard OpenAI endpoints but are still reachable
+  if (!modelsReachable) {
+    return { valid: false, error: "Connection failed while testing /chat/completions" };
+  }
+
   try {
     const pingRes = await fetch(baseUrl, {
       method: "GET",
@@ -464,12 +531,13 @@ async function validateAnthropicCompatibleProvider({ apiKey, providerSpecificDat
   }
 
   // Step 2: Fallback — try a minimal messages request
+  const testModelId = providerSpecificData?.validationModelId || "claude-3-5-sonnet-20241022";
   try {
     const messagesRes = await fetch(`${baseUrl}/messages`, {
       method: "POST",
       headers,
       body: JSON.stringify({
-        model: "claude-3-5-sonnet-20241022",
+        model: testModelId,
         max_tokens: 1,
         messages: [{ role: "user", content: "test" }],
       }),
@@ -646,6 +714,7 @@ export async function validateProviderApiKey({ provider, apiKey, providerSpecifi
         baseUrl: requestBaseUrl,
         modelId,
         headers: requestHeaders,
+        providerSpecificData,
       });
     }
 
diff --git a/src/lib/usage/callLogs.ts b/src/lib/usage/callLogs.ts
index 341dc753..2a18f3c7 100644
--- a/src/lib/usage/callLogs.ts
+++ b/src/lib/usage/callLogs.ts
@@ -330,7 +330,7 @@ export async function getCallLogs(filter: any = {}) {
   }
 
   if (filter.model) {
-    conditions.push("model LIKE @modelQ");
+    conditions.push("(model LIKE @modelQ OR requested_model LIKE @modelQ)");
     params.modelQ = `%${filter.model}%`;
   }
   if (filter.provider) {
@@ -351,7 +351,8 @@ export async function getCallLogs(filter: any = {}) {
   if (filter.search) {
     conditions.push(`(
       model LIKE @searchQ OR path LIKE @searchQ OR account LIKE @searchQ OR
-      provider LIKE @searchQ OR api_key_name LIKE @searchQ OR api_key_id LIKE @searchQ OR
+      requested_model LIKE @searchQ OR provider LIKE @searchQ OR
+      api_key_name LIKE @searchQ OR api_key_id LIKE @searchQ OR
       combo_name LIKE @searchQ OR CAST(status AS TEXT) LIKE @searchQ
     )`);
     params.searchQ = `%${filter.search}%`;
@@ -408,6 +409,7 @@ export async function getCallLogById(id: string) {
     path: toStringOrNull(entryRow.path),
     status: toNumber(entryRow.status),
     model: toStringOrNull(entryRow.model),
+    requestedModel: toStringOrNull(entryRow.requested_model),
     provider: toStringOrNull(entryRow.provider),
     account: toStringOrNull(entryRow.account),
     connectionId: toStringOrNull(entryRow.connection_id),
diff --git a/src/shared/components/RequestLoggerV2.tsx b/src/shared/components/RequestLoggerV2.tsx
index 91ed2d19..cd34ebf4 100644
--- a/src/shared/components/RequestLoggerV2.tsx
+++ b/src/shared/components/RequestLoggerV2.tsx
@@ -29,6 +29,7 @@ const STATUS_FILTERS = [
 const COLUMNS = [
   { key: "status", label: "Status" },
   { key: "model", label: "Model" },
+  { key: "requestedModel", label: "Requested" },
   { key: "provider", label: "Provider" },
   { key: "protocol", label: "Protocol" },
   { key: "account", label: "Account" },
@@ -234,7 +235,9 @@ export default function RequestLoggerV2() {
   // Unique accounts and providers for dropdowns
 
   const uniqueAccounts = [...new Set(logs.map((l) => l.account).filter((a) => a && a !== "-"))];
-  const uniqueModels = [...new Set(logs.map((l) => l.model).filter(Boolean))].sort();
+  const uniqueModels = [
+    ...new Set(logs.flatMap((l) => [l.model, l.requestedModel]).filter((value) => Boolean(value))),
+  ].sort();
   const uniqueProviders = [
     ...new Set(logs.map((l) => l.provider).filter((p) => p && p !== "-")),
   ].sort();
@@ -514,6 +517,11 @@ export default function RequestLoggerV2() {
                       Model
                     </th>
                   )}
+                  {visibleColumns.requestedModel && (
+                    <th className="px-3 py-2.5 font-semibold text-text-muted uppercase tracking-wider text-[10px]">
+                      Requested
+                    </th>
+                  )}
                   {visibleColumns.provider && (
                     <th className="px-3 py-2.5 font-semibold text-text-muted uppercase tracking-wider text-[10px]">
                       Provider
@@ -596,6 +604,28 @@ export default function RequestLoggerV2() {
                           {log.model}
                         </td>
                       )}
+                      {visibleColumns.requestedModel && (
+                        <td className="px-3 py-2 font-mono text-[11px]">
+                          {log.requestedModel ? (
+                            <span
+                              className={
+                                log.requestedModel !== log.model
+                                  ? "text-amber-600 dark:text-amber-400"
+                                  : "text-text-muted"
+                              }
+                              title={
+                                log.requestedModel !== log.model
+                                  ? `Requested ${log.requestedModel}, routed as ${log.model}`
+                                  : log.requestedModel
+                              }
+                            >
+                              {log.requestedModel}
+                            </span>
+                          ) : (
+                            <span className="text-text-muted text-[10px]">—</span>
+                          )}
+                        </td>
+                      )}
                       {visibleColumns.provider && (
                         <td className="px-3 py-2">
                           <span
diff --git a/src/shared/components/UsageAnalytics.tsx b/src/shared/components/UsageAnalytics.tsx
index e3345fdb..34d55b82 100644
--- a/src/shared/components/UsageAnalytics.tsx
+++ b/src/shared/components/UsageAnalytics.tsx
@@ -114,7 +114,7 @@ export default function UsageAnalytics() {
       </div>
 
       {/* Summary Cards — Row 1: Core metrics */}
-      <div className="grid grid-cols-2 md:grid-cols-7 gap-3">
+      <div className="grid grid-cols-2 md:grid-cols-8 gap-3">
         <StatCard
           icon="generating_tokens"
           label="Total Tokens"
@@ -142,10 +142,17 @@ export default function UsageAnalytics() {
         <StatCard icon="group" label="Accounts" value={s.uniqueAccounts || 0} />
         <StatCard icon="vpn_key" label="API Keys" value={s.uniqueApiKeys || 0} />
         <StatCard icon="model_training" label="Models" value={s.uniqueModels || 0} />
+        <StatCard
+          icon="swap_horiz"
+          label="Fallback Rate"
+          value={`${Number(s.fallbackRatePct || 0).toFixed(1)}%`}
+          subValue={`${fmtFull(s.fallbackCount || 0)} fallbacks`}
+          color="text-amber-500"
+        />
       </div>
 
       {/* Summary Cards — Row 2: Derived insights */}
-      <div className="grid grid-cols-2 md:grid-cols-7 gap-3">
+      <div className="grid grid-cols-2 md:grid-cols-8 gap-3">
         <StatCard
           icon="speed"
           label="Avg Tokens/Req"
@@ -168,6 +175,12 @@ export default function UsageAnalytics() {
         <StatCard icon="cloud" label="Top Provider" value={topProvider} color="text-teal-500" />
         <StatCard icon="today" label="Busiest Day" value={busiestDay} color="text-rose-500" />
         <StatCard icon="dns" label="Providers" value={providerCount} color="text-indigo-500" />
+        <StatCard
+          icon="rule"
+          label="Requested Coverage"
+          value={`${Number(s.requestedModelCoveragePct || 0).toFixed(1)}%`}
+          color="text-sky-500"
+        />
       </div>
 
       {/* Activity Heatmap + Weekly Widgets */}
diff --git a/src/shared/constants/cliTools.ts b/src/shared/constants/cliTools.ts
index 026b5692..aa66c95b 100644
--- a/src/shared/constants/cliTools.ts
+++ b/src/shared/constants/cliTools.ts
@@ -180,12 +180,46 @@ export const CLI_TOOLS = {
     color: "#FF6B35",
     description: "OpenCode AI coding agent (Terminal)",
     configType: "guide",
+    notes: [
+      {
+        type: "warning",
+        text: "Config path: Linux/macOS ~/.config/opencode/opencode.json • Windows %APPDATA%\\\\opencode\\\\opencode.json",
+      },
+      {
+        type: "warning",
+        text: 'Thinking variant example: opencode run "implement this feature" --model omniroute/claude-sonnet-4-5-thinking --variant high',
+      },
+    ],
     guideSteps: [
       { step: 1, title: "Install OpenCode", desc: "Install via npm: npm install -g opencode-ai" },
       { step: 2, title: "API Key", type: "apiKeySelector" },
       { step: 3, title: "Set Base URL", desc: "opencode config set baseUrl {{baseUrl}}" },
       { step: 4, title: "Select Model", type: "modelSelector" },
+      {
+        step: 5,
+        title: "Use Thinking Variant",
+        desc: "For thinking models, run with --variant high/low/max (example command below).",
+      },
     ],
+    codeBlock: {
+      language: "json",
+      code: `{
+  "providers": {
+    "omniroute": {
+      "name": "OmniRoute",
+      "api": "openai",
+      "baseURL": "{{baseUrl}}",
+      "apiKey": "{{apiKey}}",
+      "models": [
+        "{{model}}",
+        "claude-sonnet-4-5-thinking",
+        "gemini-3.1-pro-high",
+        "gemini-3-flash"
+      ]
+    }
+  }
+}`,
+    },
   },
   kiro: {
     id: "kiro",
diff --git a/src/shared/constants/modelSpecs.ts b/src/shared/constants/modelSpecs.ts
new file mode 100644
index 00000000..ede662c7
--- /dev/null
+++ b/src/shared/constants/modelSpecs.ts
@@ -0,0 +1,111 @@
+/**
+ * Centralized specifications for AI Models.
+ * Contains maximum token caps and thinking budgets to prevent API errors
+ * when clients request more than the model supports.
+ */
+
+export interface ModelSpec {
+  maxOutputTokens: number;
+  contextWindow?: number;
+  defaultThinkingBudget?: number;
+  thinkingBudgetCap?: number;
+  thinkingOverhead?: number; // buffer de tokens para thinking
+  adaptiveMaxTokens?: number; // tokens disponíveis para output quando thinking ativo
+  aliases?: string[]; // IDs alternativos para este modelo
+  supportsThinking?: boolean;
+  supportsTools?: boolean;
+  supportsVision?: boolean;
+}
+
+export const MODEL_SPECS: Record<string, ModelSpec> = {
+  // ── Gemini 3 Flash series ───────────────────────────────────────
+  "gemini-3-flash": {
+    maxOutputTokens: 65536,
+    contextWindow: 1048576,
+    defaultThinkingBudget: 0,
+    thinkingBudgetCap: 0,
+    supportsThinking: false,
+    supportsTools: true,
+    supportsVision: true,
+    aliases: ["gemini-3-flash-preview", "gemini-3.1-flash-lite-preview"],
+  },
+
+  // ── Gemini 3.1 Pro High ─────────────────────────────────────────
+  "gemini-3.1-pro-high": {
+    maxOutputTokens: 131072,
+    contextWindow: 1048576,
+    defaultThinkingBudget: 24576,
+    thinkingBudgetCap: 32768,
+    thinkingOverhead: 1000,
+    supportsThinking: true,
+    supportsTools: true,
+    supportsVision: true,
+    aliases: ["gemini-3-pro-high"],
+  },
+
+  // ── Gemini 3.1 Pro Low ──────────────────────────────────────────
+  "gemini-3.1-pro-low": {
+    maxOutputTokens: 131072,
+    contextWindow: 1048576,
+    defaultThinkingBudget: 8192,
+    thinkingBudgetCap: 16000,
+    supportsThinking: true,
+    supportsTools: true,
+    supportsVision: true,
+    aliases: ["gemini-3-pro-low"],
+  },
+
+  // ── Claude Opus 4.5 ─────────────────────────────────────────────
+  "claude-opus-4-5": {
+    maxOutputTokens: 32768,
+    contextWindow: 200000,
+    defaultThinkingBudget: 10000,
+    thinkingBudgetCap: 32000,
+    supportsThinking: true,
+    supportsTools: true,
+    supportsVision: true,
+  },
+
+  // Defaults
+  __default__: {
+    maxOutputTokens: 8192,
+  },
+};
+
+export function getModelSpec(modelId: string): ModelSpec | undefined {
+  if (MODEL_SPECS[modelId]) return MODEL_SPECS[modelId];
+
+  // Buscas por alias
+  for (const [canonical, spec] of Object.entries(MODEL_SPECS)) {
+    if (spec.aliases?.includes(modelId)) return spec;
+  }
+
+  // Prefix matching
+  for (const [key, spec] of Object.entries(MODEL_SPECS)) {
+    if (key !== "__default__" && modelId.startsWith(key)) return spec;
+  }
+
+  return undefined;
+}
+
+export function capMaxOutputTokens(modelId: string, requested?: number): number {
+  const spec = getModelSpec(modelId);
+  const cap = spec?.maxOutputTokens ?? MODEL_SPECS.__default__.maxOutputTokens;
+  return requested ? Math.min(requested, cap) : cap;
+}
+
+export function getDefaultThinkingBudget(modelId: string): number {
+  return getModelSpec(modelId)?.defaultThinkingBudget ?? 0;
+}
+
+export function capThinkingBudget(modelId: string, budget: number): number {
+  const cap = getModelSpec(modelId)?.thinkingBudgetCap ?? budget;
+  return Math.min(budget, cap);
+}
+
+export function resolveModelAlias(modelId: string): string {
+  for (const [canonical, spec] of Object.entries(MODEL_SPECS)) {
+    if (spec.aliases?.includes(modelId)) return canonical;
+  }
+  return modelId;
+}
diff --git a/src/shared/constants/pricing.ts b/src/shared/constants/pricing.ts
index 399de81d..aa7af829 100644
--- a/src/shared/constants/pricing.ts
+++ b/src/shared/constants/pricing.ts
@@ -102,6 +102,21 @@ export const DEFAULT_PRICING = {
       reasoning: 30.0,
       cache_creation: 5.0,
     },
+    // T12: fallback pricing for gpt-5.4 mini variants
+    "gpt-5.4-mini": {
+      input: 1.5,
+      output: 6.0,
+      cached: 0.75,
+      reasoning: 9.0,
+      cache_creation: 1.5,
+    },
+    "gpt5.4-mini": {
+      input: 1.5,
+      output: 6.0,
+      cached: 0.75,
+      reasoning: 9.0,
+      cache_creation: 1.5,
+    },
     // GPT 5.3 Codex family (all same pricing tier)
     "gpt-5.3-codex": GPT_5_3_CODEX_PRICING,
     "gpt-5.3-codex-xhigh": GPT_5_3_CODEX_PRICING,
@@ -183,6 +198,13 @@ export const DEFAULT_PRICING = {
       reasoning: 4.5,
       cache_creation: 0.5,
     },
+    "gemini-3.1-flash-lite-preview": {
+      input: 0.5,
+      output: 3.0,
+      cached: 0.03,
+      reasoning: 4.5,
+      cache_creation: 0.5,
+    },
     "gemini-3-pro-preview": {
       input: 2.0,
       output: 12.0,
@@ -197,6 +219,20 @@ export const DEFAULT_PRICING = {
       reasoning: 18.0,
       cache_creation: 2.0,
     },
+    "gemini-3-flash-preview": {
+      input: 0.5,
+      output: 3.0,
+      cached: 0.03,
+      reasoning: 4.5,
+      cache_creation: 0.5,
+    },
+    "gemini-3.1-flash-lite-preview": {
+      input: 0.5,
+      output: 3.0,
+      cached: 0.03,
+      reasoning: 4.5,
+      cache_creation: 0.5,
+    },
     "gemini-2.5-pro": {
       input: 2.0,
       output: 12.0,
@@ -707,11 +743,11 @@ export const DEFAULT_PRICING = {
   // GLM
   glm: {
     "glm-5": {
-      input: 1.0,
-      output: 3.2,
-      cached: 0.5,
-      reasoning: 4.8,
-      cache_creation: 1.0,
+      input: 0.38,
+      output: 1.98,
+      cached: 0.19,
+      reasoning: 2.97,
+      cache_creation: 0.38,
     },
     "glm-5-turbo": {
       input: 1.2,
@@ -721,11 +757,11 @@ export const DEFAULT_PRICING = {
       cache_creation: 1.2,
     },
     "glm-4.7": {
-      input: 0.75,
-      output: 3.0,
-      cached: 0.375,
-      reasoning: 4.5,
-      cache_creation: 0.75,
+      input: 0.38,
+      output: 1.98,
+      cached: 0.19,
+      reasoning: 2.97,
+      cache_creation: 0.38,
     },
     "glm-4.6": {
       input: 0.5,
@@ -761,6 +797,20 @@ export const DEFAULT_PRICING = {
       reasoning: 4.5,
       cache_creation: 0.6,
     },
+    "kimi-k2.5-thinking": {
+      input: 0.6,
+      output: 3.0,
+      cached: 0.3,
+      reasoning: 4.5,
+      cache_creation: 0.6,
+    },
+    "kimi-for-coding": {
+      input: 0.6,
+      output: 3.0,
+      cached: 0.3,
+      reasoning: 4.5,
+      cache_creation: 0.6,
+    },
     "moonshot-kimi-k2.5": {
       input: 0.6,
       output: 3.0,
@@ -770,6 +820,30 @@ export const DEFAULT_PRICING = {
     },
   },
 
+  // Kimi Coding aliases (OAuth/API key)
+  kmc: {
+    "kimi-k2.5": { input: 0.6, output: 3.0, cached: 0.3, reasoning: 4.5, cache_creation: 0.6 },
+    "kimi-k2.5-thinking": {
+      input: 0.6,
+      output: 3.0,
+      cached: 0.3,
+      reasoning: 4.5,
+      cache_creation: 0.6,
+    },
+    "kimi-latest": { input: 1.0, output: 4.0, cached: 0.5, reasoning: 6.0, cache_creation: 1.0 },
+  },
+  kmca: {
+    "kimi-k2.5": { input: 0.6, output: 3.0, cached: 0.3, reasoning: 4.5, cache_creation: 0.6 },
+    "kimi-k2.5-thinking": {
+      input: 0.6,
+      output: 3.0,
+      cached: 0.3,
+      reasoning: 4.5,
+      cache_creation: 0.6,
+    },
+    "kimi-latest": { input: 1.0, output: 4.0, cached: 0.5, reasoning: 6.0, cache_creation: 1.0 },
+  },
+
   // MiniMax
   minimax: {
     "minimax-m2.1": {
@@ -789,18 +863,18 @@ export const DEFAULT_PRICING = {
     // MiniMax M2.5 — mais barato que M2.1, reasoning + tools
     // Context: 204.800 tokens | Max Output: 16.384 tokens
     "minimax-m2.5": {
-      input: 0.3,
-      output: 1.2,
-      cached: 0.15,
-      reasoning: 1.8,
-      cache_creation: 0.3,
+      input: 0.27,
+      output: 0.95,
+      cached: 0.135,
+      reasoning: 1.425,
+      cache_creation: 0.27,
     },
     "MiniMax-M2.5": {
-      input: 0.3,
-      output: 1.2,
-      cached: 0.15,
-      reasoning: 1.8,
-      cache_creation: 0.3,
+      input: 0.27,
+      output: 0.95,
+      cached: 0.135,
+      reasoning: 1.425,
+      cache_creation: 0.27,
     },
     // T12: MiniMax M2.7 — new default model (sub2api PR #1120)
     // Upgraded from M2.5, same API endpoint api.minimax.io
@@ -1107,11 +1181,11 @@ export const DEFAULT_PRICING = {
   // ─────────────────────────────────────────────────────────────────────
   zai: {
     "glm-5": {
-      input: 1.0,
-      output: 3.2,
-      cached: 0.5,
-      reasoning: 4.8,
-      cache_creation: 1.0,
+      input: 0.38,
+      output: 1.98,
+      cached: 0.19,
+      reasoning: 2.97,
+      cache_creation: 0.38,
     },
     "glm-5-turbo": {
       input: 1.2,
@@ -1120,6 +1194,13 @@ export const DEFAULT_PRICING = {
       reasoning: 6.0,
       cache_creation: 1.2,
     },
+    "glm-4.7": {
+      input: 0.38,
+      output: 1.98,
+      cached: 0.19,
+      reasoning: 2.97,
+      cache_creation: 0.38,
+    },
   },
 
   kiro: {
diff --git a/src/shared/services/cliRuntime.ts b/src/shared/services/cliRuntime.ts
index bbfd706c..b8ef8a90 100644
--- a/src/shared/services/cliRuntime.ts
+++ b/src/shared/services/cliRuntime.ts
@@ -98,7 +98,7 @@ const CLI_TOOLS: Record<string, any> = {
     // opencode takes several seconds on cold start environments
     healthcheckTimeoutMs: 15000,
     paths: {
-      config: ".config/opencode/config.toml",
+      config: ".config/opencode/opencode.json",
     },
   },
 };
@@ -337,9 +337,39 @@ export const ensureCliConfigWriteAllowed = () => {
 export const getCliConfigHome = () =>
   String(process.env.CLI_CONFIG_HOME || "").trim() || os.homedir();
 
+export const resolveOpencodeConfigDir = (
+  platform = process.platform,
+  env: NodeJS.ProcessEnv = process.env,
+  homeDir = os.homedir()
+) => {
+  const isWin = platform === "win32";
+  if (isWin) {
+    const appData = String(env.APPDATA || "").trim();
+    return appData || path.join(homeDir, "AppData", "Roaming");
+  }
+
+  const xdgConfigHome = String(env.XDG_CONFIG_HOME || "").trim();
+  return xdgConfigHome || path.join(homeDir, ".config");
+};
+
+export const resolveOpencodeConfigPath = (
+  platform = process.platform,
+  env: NodeJS.ProcessEnv = process.env,
+  homeDir = os.homedir()
+) => path.join(resolveOpencodeConfigDir(platform, env, homeDir), "opencode", "opencode.json");
+
+export const getOpenCodeConfigPath = () => resolveOpencodeConfigPath();
+
 export const getCliConfigPaths = (toolId: string) => {
   const tool = CLI_TOOLS[toolId];
   if (!tool) return null;
+
+  if (toolId === "opencode") {
+    return {
+      config: getOpenCodeConfigPath(),
+    };
+  }
+
   const home = getCliConfigHome();
   return Object.fromEntries(
     Object.entries(tool.paths).map(([key, relativePath]) => [
diff --git a/src/shared/services/opencodeConfig.ts b/src/shared/services/opencodeConfig.ts
new file mode 100644
index 00000000..da4305ea
--- /dev/null
+++ b/src/shared/services/opencodeConfig.ts
@@ -0,0 +1,64 @@
+type OpenCodeConfigInput = {
+  baseUrl?: string;
+  apiKey?: string;
+  model?: string;
+};
+
+type OpenCodeProviderConfig = {
+  name: string;
+  api: "openai";
+  baseURL: string;
+  apiKey: string;
+  models: string[];
+};
+
+const OPENCODE_DEFAULT_MODELS = [
+  "claude-opus-4-5-thinking",
+  "claude-sonnet-4-5-thinking",
+  "gemini-3.1-pro-high",
+  "gemini-3-flash",
+] as const;
+
+const normalizeValue = (value: unknown) =>
+  String(value || "")
+    .trim()
+    .replace(/^\/+/, "");
+
+export const buildOpenCodeProviderConfig = ({
+  baseUrl,
+  apiKey,
+  model,
+}: OpenCodeConfigInput): OpenCodeProviderConfig => {
+  const normalizedBaseUrl = String(baseUrl || "")
+    .trim()
+    .replace(/\/+$/, "");
+  const normalizedModel = normalizeValue(model);
+
+  const uniqueModels = [...new Set([normalizedModel, ...OPENCODE_DEFAULT_MODELS].filter(Boolean))];
+
+  return {
+    name: "OmniRoute",
+    api: "openai",
+    baseURL: normalizedBaseUrl,
+    apiKey: apiKey || "sk_omniroute",
+    models: uniqueModels,
+  };
+};
+
+export const mergeOpenCodeConfig = (
+  existingConfig: Record<string, any> | null | undefined,
+  input: OpenCodeConfigInput
+) => {
+  const safeConfig =
+    existingConfig && typeof existingConfig === "object" && !Array.isArray(existingConfig)
+      ? existingConfig
+      : {};
+
+  return {
+    ...safeConfig,
+    providers: {
+      ...((safeConfig as any).providers || {}),
+      omniroute: buildOpenCodeProviderConfig(input),
+    },
+  };
+};
diff --git a/src/shared/utils/apiKeyPolicy.ts b/src/shared/utils/apiKeyPolicy.ts
index a261f1a7..211a074e 100644
--- a/src/shared/utils/apiKeyPolicy.ts
+++ b/src/shared/utils/apiKeyPolicy.ts
@@ -37,6 +37,7 @@ export interface ApiKeyMetadata {
   accessSchedule?: AccessSchedule | null;
   maxRequestsPerDay?: number | null;
   maxRequestsPerMinute?: number | null;
+  maxSessions?: number | null;
 }
 
 /**
diff --git a/src/shared/validation/schemas.ts b/src/shared/validation/schemas.ts
index 70d87421..a6a6d3a1 100644
--- a/src/shared/validation/schemas.ts
+++ b/src/shared/validation/schemas.ts
@@ -905,6 +905,7 @@ export const updateKeyPermissionsSchema = z
     noLog: z.boolean().optional(),
     autoResolve: z.boolean().optional(),
     isActive: z.boolean().optional(),
+    maxSessions: z.number().int().min(0).max(10000).optional(),
     accessSchedule: z.union([accessScheduleSchema, z.null()]).optional(),
   })
   .superRefine((value, ctx) => {
@@ -915,6 +916,7 @@ export const updateKeyPermissionsSchema = z
       value.noLog === undefined &&
       value.autoResolve === undefined &&
       value.isActive === undefined &&
+      value.maxSessions === undefined &&
       value.accessSchedule === undefined
     ) {
       ctx.addIssue({
@@ -1028,6 +1030,7 @@ export const providersBatchTestSchema = z
 export const validateProviderApiKeySchema = z.object({
   provider: z.string().trim().min(1, "Provider and API key required"),
   apiKey: z.string().trim().min(1, "Provider and API key required"),
+  validationModelId: z.string().trim().optional(),
 });
 
 const geminiPartSchema = z
diff --git a/src/sse/handlers/chat.ts b/src/sse/handlers/chat.ts
index e7339d7c..4ffdf46b 100644
--- a/src/sse/handlers/chat.ts
+++ b/src/sse/handlers/chat.ts
@@ -46,6 +46,14 @@ import {
   applyTaskAwareRouting,
   getTaskRoutingConfig,
 } from "@omniroute/open-sse/services/taskAwareRouter.ts";
+import {
+  generateSessionId as generateStableSessionId,
+  touchSession,
+  extractExternalSessionId,
+  checkSessionLimit,
+  registerKeySession,
+  isSessionRegisteredForKey,
+} from "@omniroute/open-sse/services/sessionManager.ts";
 import {
   isFallbackDecision,
   shouldUseFallback,
@@ -161,6 +169,13 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
     return errorResponse(HTTP_STATUS.BAD_REQUEST, "Missing model");
   }
 
+  // T04: client-provided external session header has priority over generated fingerprint.
+  const externalSessionId = extractExternalSessionId(request.headers);
+  const sessionId = externalSessionId || generateStableSessionId(body);
+  if (sessionId) {
+    touchSession(sessionId);
+  }
+
   // Pipeline: API key policy enforcement (model restrictions + budget limits)
   telemetry.startPhase("policy");
   const policy = await enforceApiKeyPolicy(request, modelStr);
@@ -174,6 +189,25 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
   const apiKeyInfo = policy.apiKeyInfo;
   telemetry.endPhase();
 
+  // T08: per-key active session limit (0 = unlimited).
+  if (apiKeyInfo?.id && sessionId) {
+    const maxSessions =
+      typeof apiKeyInfo.maxSessions === "number" && apiKeyInfo.maxSessions > 0
+        ? apiKeyInfo.maxSessions
+        : 0;
+
+    if (maxSessions > 0 && !isSessionRegisteredForKey(apiKeyInfo.id, sessionId)) {
+      const sessionViolation = checkSessionLimit(apiKeyInfo.id, maxSessions);
+      if (sessionViolation) {
+        return withSessionHeader(
+          errorResponse(HTTP_STATUS.RATE_LIMITED, sessionViolation.message),
+          sessionId
+        );
+      }
+      registerKeySession(apiKeyInfo.id, sessionId);
+    }
+  }
+
   // T05 — Task-Aware Smart Routing
   // Detect the semantic task type and optionally route to the optimal model
   let resolvedModelStr = modelStr;
@@ -221,7 +255,8 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
       const creds = await getProviderCredentials(
         provider,
         null,
-        apiKeyInfo?.allowedConnections ?? null
+        apiKeyInfo?.allowedConnections ?? null,
+        modelInfo.model || modelString
       );
       if (!creds || creds.allRateLimited) return false;
       return true;
@@ -238,7 +273,9 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
       body,
       combo,
       handleSingleModel: (b: any, m: string) =>
-        handleSingleModelChat(b, m, clientRawRequest, request, combo.name, apiKeyInfo, telemetry),
+        handleSingleModelChat(b, m, clientRawRequest, request, combo.name, apiKeyInfo, telemetry, {
+          sessionId,
+        }),
       isModelAvailable: checkModelAvailable,
       log,
       settings,
@@ -247,7 +284,7 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
 
     // Record telemetry
     recordTelemetry(telemetry);
-    return response;
+    return withSessionHeader(response, sessionId);
   }
   telemetry.endPhase();
 
@@ -259,10 +296,11 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
     request,
     null,
     apiKeyInfo,
-    telemetry
+    telemetry,
+    { sessionId }
   );
   recordTelemetry(telemetry);
-  return response;
+  return withSessionHeader(response, sessionId);
 }
 
 /**
@@ -280,7 +318,7 @@ async function handleSingleModelChat(
   comboName: string | null = null,
   apiKeyInfo: any = null,
   telemetry: any = null,
-  runtimeOptions: { emergencyFallbackTried?: boolean } = {}
+  runtimeOptions: { emergencyFallbackTried?: boolean; sessionId?: string | null } = {}
 ) {
   // 1. Resolve model → provider/model
   const resolved = await resolveModelOrError(modelStr, body);
@@ -310,7 +348,8 @@ async function handleSingleModelChat(
     const credentials = await getProviderCredentials(
       provider,
       excludeConnectionId,
-      apiKeyInfo?.allowedConnections ?? null
+      apiKeyInfo?.allowedConnections ?? null,
+      model
     );
 
     if (!credentials || credentials.allRateLimited) {
@@ -333,6 +372,9 @@ async function handleSingleModelChat(
 
     const accountId = credentials.connectionId.slice(0, 8);
     log.info("AUTH", `Using ${provider} account: ${accountId}...`);
+    if (runtimeOptions.sessionId) {
+      touchSession(runtimeOptions.sessionId, credentials.connectionId);
+    }
 
     const refreshedCredentials = await checkAndRefreshToken(provider, credentials);
     const proxyInfo = await safeResolveProxy(credentials.connectionId);
@@ -604,6 +646,23 @@ async function executeChatWithBreaker({
         tlsFingerprintUsed: false,
       };
     }
+
+    // T14: Proxy Fast-Fail should be converted into an upstream-unavailable result
+    // so account fallback logic can continue with another connection.
+    if (cbErr?.code === "PROXY_UNREACHABLE" || /proxy unreachable/i.test(cbErr?.message || "")) {
+      const detail = cbErr?.message || "Proxy unreachable";
+      log.warn("PROXY", detail);
+      return {
+        result: {
+          success: false,
+          response: (unavailableResponse as any)(HTTP_STATUS.SERVICE_UNAVAILABLE, detail, 2),
+          status: HTTP_STATUS.SERVICE_UNAVAILABLE,
+          error: detail,
+        },
+        tlsFingerprintUsed: false,
+      };
+    }
+
     throw cbErr;
   }
 }
@@ -710,3 +769,20 @@ function safeLogEvents({
     });
   } catch {}
 }
+
+function withSessionHeader(response: Response, sessionId: string | null): Response {
+  if (!response || !sessionId) return response;
+
+  try {
+    response.headers.set("X-OmniRoute-Session-Id", sessionId);
+    return response;
+  } catch {
+    const cloned = new Response(response.body, {
+      status: response.status,
+      statusText: response.statusText,
+      headers: response.headers,
+    });
+    cloned.headers.set("X-OmniRoute-Session-Id", sessionId);
+    return cloned;
+  }
+}
diff --git a/src/sse/services/auth.ts b/src/sse/services/auth.ts
index 97ca748d..757bff7d 100644
--- a/src/sse/services/auth.ts
+++ b/src/sse/services/auth.ts
@@ -16,6 +16,7 @@ import {
 } from "@omniroute/open-sse/services/accountFallback.ts";
 import { isLocalProvider } from "@omniroute/open-sse/config/providerRegistry.ts";
 import { COOLDOWN_MS } from "@omniroute/open-sse/config/constants.ts";
+import { getCodexModelScope } from "@omniroute/open-sse/executors/codex.ts";
 import * as log from "../utils/logger";
 import { fisherYatesShuffle, getNextFromDeckSync } from "@/shared/utils/shuffleDeck";
 
@@ -166,6 +167,56 @@ function applyCodexWindowPolicy(rawWindows: string[], providerSpecificData: Json
   return uniqueWindows(windows);
 }
 
+function getCodexScopeRateLimitedUntil(
+  providerSpecificData: JsonRecord,
+  model: string | null
+): string | null {
+  if (!model) return null;
+  const scope = getCodexModelScope(model);
+  const scopeMap = asRecord(providerSpecificData.codexScopeRateLimitedUntil);
+  const value = scopeMap[scope];
+  return typeof value === "string" && value.trim().length > 0 ? value : null;
+}
+
+function isCodexScopeUnavailable(
+  connection: ProviderConnectionView,
+  model: string | null
+): boolean {
+  const until = getCodexScopeRateLimitedUntil(connection.providerSpecificData, model);
+  if (!until) return false;
+  return new Date(until).getTime() > Date.now();
+}
+
+function getEarliestCodexScopeRateLimitedUntil(
+  connections: ProviderConnectionView[],
+  model: string | null
+): string | null {
+  let earliest: string | null = null;
+  let earliestMs = Infinity;
+
+  for (const conn of connections) {
+    const until = getCodexScopeRateLimitedUntil(conn.providerSpecificData, model);
+    if (!until) continue;
+    const ms = new Date(until).getTime();
+    if (!Number.isFinite(ms) || ms <= Date.now()) continue;
+    if (ms < earliestMs) {
+      earliest = until;
+      earliestMs = ms;
+    }
+  }
+
+  return earliest;
+}
+
+function normalizeStatus(value: string | null): string {
+  return (value || "").trim().toLowerCase();
+}
+
+function isTerminalConnectionStatus(connection: ProviderConnectionView): boolean {
+  const status = normalizeStatus(connection.testStatus);
+  return status === "credits_exhausted" || status === "banned" || status === "expired";
+}
+
 export function resolveQuotaLimitPolicy(
   provider: string,
   providerSpecificData: JsonRecord
@@ -259,7 +310,8 @@ export { fisherYatesShuffle, getNextFromDeckSync as getNextFromDeck };
 export async function getProviderCredentials(
   provider: string,
   excludeConnectionId: string | null = null,
-  allowedConnections: string[] | null = null
+  allowedConnections: string[] | null = null,
+  requestedModel: string | null = null
 ) {
   // Acquire mutex to prevent race conditions
   const currentMutex = selectionMutex;
@@ -320,6 +372,8 @@ export async function getProviderCredentials(
     const availableConnections = connections.filter((c) => {
       if (excludeConnectionId && c.id === excludeConnectionId) return false;
       if (isAccountUnavailable(c.rateLimitedUntil)) return false;
+      if (isTerminalConnectionStatus(c)) return false;
+      if (provider === "codex" && isCodexScopeUnavailable(c, requestedModel)) return false;
       return true;
     });
 
@@ -330,16 +384,27 @@ export async function getProviderCredentials(
     connections.forEach((c) => {
       const excluded = excludeConnectionId && c.id === excludeConnectionId;
       const rateLimited = isAccountUnavailable(c.rateLimitedUntil);
+      const terminalStatus = isTerminalConnectionStatus(c);
+      const codexScopeLimited = provider === "codex" && isCodexScopeUnavailable(c, requestedModel);
       if (excluded || rateLimited) {
         log.debug(
           "AUTH",
           `  → ${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""}`
         );
+      } else if (terminalStatus) {
+        log.debug("AUTH", `  → ${c.id?.slice(0, 8)} | skipped terminal status=${c.testStatus}`);
+      } else if (codexScopeLimited) {
+        const scopeUntil = getCodexScopeRateLimitedUntil(c.providerSpecificData, requestedModel);
+        log.debug("AUTH", `  → ${c.id?.slice(0, 8)} | codex scope-limited until ${scopeUntil}`);
       }
     });
 
     if (availableConnections.length === 0) {
-      const earliest = getEarliestRateLimitedUntil(connections);
+      const earliest =
+        getEarliestRateLimitedUntil(connections) ||
+        (provider === "codex"
+          ? getEarliestCodexScopeRateLimitedUntil(connections, requestedModel)
+          : null);
       if (earliest) {
         // Find the connection with the earliest rateLimitedUntil to get its error info
         const rateLimitedConns = connections.filter(
@@ -618,6 +683,15 @@ export async function markAccountUnavailable(
     const conn = connections.find((connection) => connection.id === connectionId);
     const backoffLevel = conn?.backoffLevel || 0;
 
+    // T06/T10/T36: terminal statuses should not be overwritten by transient cooldown state.
+    if (conn && isTerminalConnectionStatus(conn)) {
+      log.info(
+        "AUTH",
+        `${connectionId.slice(0, 8)} terminal status=${conn.testStatus}, skipping cooldown overwrite`
+      );
+      return { shouldFallback: true, cooldownMs: 0 };
+    }
+
     // ─── Anti-Thundering Herd Guard ─────────────────────────────────
     // If this connection was ALREADY marked unavailable by a prior concurrent
     // request (within the mutex window), skip re-marking to avoid resetting
@@ -633,6 +707,24 @@ export async function markAccountUnavailable(
       };
     }
 
+    // T09: Codex scope-aware lockout guard (codex vs spark independent pools).
+    if (provider === "codex" && model) {
+      const scopeRateLimitedUntil = getCodexScopeRateLimitedUntil(
+        conn?.providerSpecificData || {},
+        model
+      );
+      if (scopeRateLimitedUntil && new Date(scopeRateLimitedUntil).getTime() > Date.now()) {
+        log.info(
+          "AUTH",
+          `${connectionId.slice(0, 8)} already scope-limited for ${getCodexModelScope(model)} (until ${scopeRateLimitedUntil}), skipping duplicate mark`
+        );
+        return {
+          shouldFallback: true,
+          cooldownMs: new Date(scopeRateLimitedUntil).getTime() - Date.now(),
+        };
+      }
+    }
+
     const { shouldFallback, cooldownMs, newBackoffLevel, reason } = checkFallbackError(
       status,
       errorText,
@@ -662,6 +754,40 @@ export async function markAccountUnavailable(
     const rateLimitedUntil = getUnavailableUntil(cooldownMs);
     const errorMsg = typeof errorText === "string" ? errorText.slice(0, 100) : "Provider error";
 
+    // T09: Codex per-scope lockout (do not block the whole account globally).
+    if (provider === "codex" && status === 429 && model && conn) {
+      const scope = getCodexModelScope(model);
+      const existingScopeMap = asRecord(conn.providerSpecificData.codexScopeRateLimitedUntil);
+      const persistedScopeUntil = getCodexScopeRateLimitedUntil(conn.providerSpecificData, model);
+      const scopeRateLimitedUntil = persistedScopeUntil || rateLimitedUntil;
+      const scopeCooldownMs = Math.max(new Date(scopeRateLimitedUntil).getTime() - Date.now(), 0);
+
+      await updateProviderConnection(connectionId, {
+        testStatus: "unavailable",
+        lastError: errorMsg,
+        errorCode: status,
+        lastErrorAt: new Date().toISOString(),
+        backoffLevel: newBackoffLevel ?? backoffLevel,
+        providerSpecificData: {
+          ...conn.providerSpecificData,
+          codexScopeRateLimitedUntil: {
+            ...existingScopeMap,
+            [scope]: scopeRateLimitedUntil,
+          },
+        },
+      });
+
+      if (scopeCooldownMs > 0) {
+        lockModel(provider, connectionId, model, reason || "unknown", scopeCooldownMs);
+      }
+
+      if (status && errorMsg) {
+        console.error(`❌ ${provider} [${status}] (${scope}): ${errorMsg}`);
+      }
+
+      return { shouldFallback: true, cooldownMs: scopeCooldownMs };
+    }
+
     await updateProviderConnection(connectionId, {
       rateLimitedUntil,
       testStatus: "unavailable",
diff --git a/tests/unit/background-task-detector.test.mjs b/tests/unit/background-task-detector.test.mjs
index b483b155..bfaa9731 100644
--- a/tests/unit/background-task-detector.test.mjs
+++ b/tests/unit/background-task-detector.test.mjs
@@ -3,6 +3,7 @@ import assert from "node:assert/strict";
 
 const {
   isBackgroundTask,
+  getBackgroundTaskReason,
   getDegradedModel,
   setBackgroundDegradationConfig,
   getBackgroundDegradationConfig,
@@ -68,6 +69,26 @@ test("isBackgroundTask: detects X-Request-Priority header", () => {
   assert.equal(isBackgroundTask(body, headers), true);
 });
 
+test("isBackgroundTask: detects X-Task-Type header", () => {
+  const body = {
+    model: "claude-sonnet-4",
+    messages: [{ role: "user", content: "hello" }],
+  };
+  const headers = { "x-task-type": "background" };
+  assert.equal(isBackgroundTask(body, headers), true);
+  assert.equal(getBackgroundTaskReason(body, headers), "header_background");
+});
+
+test("isBackgroundTask: detects low max_tokens requests", () => {
+  const body = {
+    model: "claude-sonnet-4",
+    max_tokens: 32,
+    messages: [{ role: "user", content: "hello" }],
+  };
+  assert.equal(isBackgroundTask(body), true);
+  assert.equal(getBackgroundTaskReason(body), "low_max_tokens");
+});
+
 test("isBackgroundTask: returns false for null/undefined body", () => {
   assert.equal(isBackgroundTask(null), false);
   assert.equal(isBackgroundTask(undefined), false);
@@ -81,8 +102,8 @@ test("isBackgroundTask: returns false for empty messages", () => {
 
 test("getDegradedModel: returns cheaper model from map", () => {
   resetStats();
-  assert.equal(getDegradedModel("claude-opus-4-6"), "gemini-2.5-flash");
-  assert.equal(getDegradedModel("gemini-2.5-pro"), "gemini-2.5-flash");
+  assert.equal(getDegradedModel("claude-opus-4-6"), "gemini-3-flash");
+  assert.equal(getDegradedModel("gemini-2.5-pro"), "gemini-3-flash");
   assert.equal(getDegradedModel("gpt-4o"), "gpt-4o-mini");
 });
 
diff --git a/tests/unit/call-logs-requested-model.test.mjs b/tests/unit/call-logs-requested-model.test.mjs
new file mode 100644
index 00000000..c94b55f4
--- /dev/null
+++ b/tests/unit/call-logs-requested-model.test.mjs
@@ -0,0 +1,52 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+const TEST_DATA_DIR = fs.mkdtempSync(path.join(os.tmpdir(), "omniroute-calllogs-rm-"));
+process.env.DATA_DIR = TEST_DATA_DIR;
+
+const core = await import("../../src/lib/db/core.ts");
+const callLogs = await import("../../src/lib/usage/callLogs.ts");
+
+async function resetStorage() {
+  core.resetDbInstance();
+  fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
+  fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
+}
+
+test.beforeEach(async () => {
+  await resetStorage();
+});
+
+test.after(() => {
+  core.resetDbInstance();
+  fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
+});
+
+test("call logs persist requestedModel and allow filtering by requested model", async () => {
+  await callLogs.saveCallLog({
+    method: "POST",
+    path: "/v1/chat/completions",
+    status: 200,
+    model: "openai/gpt-5.2-mini",
+    requestedModel: "openai/gpt-5.2-codex",
+    provider: "openai",
+    duration: 123,
+    requestBody: { messages: [{ role: "user", content: "hello" }] },
+    responseBody: { id: "resp_1" },
+  });
+
+  const all = await callLogs.getCallLogs({ limit: 10 });
+  assert.equal(all.length, 1);
+  assert.equal(all[0].model, "openai/gpt-5.2-mini");
+  assert.equal(all[0].requestedModel, "openai/gpt-5.2-codex");
+
+  const byRequested = await callLogs.getCallLogs({ model: "gpt-5.2-codex", limit: 10 });
+  assert.equal(byRequested.length, 1);
+  assert.equal(byRequested[0].requestedModel, "openai/gpt-5.2-codex");
+
+  const detail = await callLogs.getCallLogById(all[0].id);
+  assert.equal(detail?.requestedModel, "openai/gpt-5.2-codex");
+});
diff --git a/tests/unit/fixes-p1.test.mjs b/tests/unit/fixes-p1.test.mjs
index a672ab31..636543b0 100644
--- a/tests/unit/fixes-p1.test.mjs
+++ b/tests/unit/fixes-p1.test.mjs
@@ -3,6 +3,7 @@ import assert from "node:assert/strict";
 import fs from "node:fs";
 import os from "node:os";
 import path from "node:path";
+import net from "node:net";
 
 const isWindows = process.platform === "win32";
 const TEST_DATA_DIR = fs.mkdtempSync(path.join(os.tmpdir(), "omniroute-fixes-"));
@@ -342,11 +343,29 @@ test("proxy fetch rejects socks5 context when feature flag is disabled", async (
 
 test("proxy fetch accepts socks5 context when feature flag is enabled", async () => {
   await withEnv("ENABLE_SOCKS5_PROXY", "true", async () => {
-    const result = await proxyFetch.runWithProxyContext(
-      { type: "socks5", host: "127.0.0.1", port: "1080" },
-      async () => "ok"
-    );
-    assert.equal(result, "ok");
+    const server = net.createServer();
+    await new Promise((resolve, reject) => {
+      server.once("error", reject);
+      server.listen(0, "127.0.0.1", resolve);
+    });
+
+    const address = server.address();
+    assert.ok(address && typeof address === "object");
+
+    try {
+      const result = await proxyFetch.runWithProxyContext(
+        { type: "socks5", host: "127.0.0.1", port: String(address.port) },
+        async () => "ok"
+      );
+      assert.equal(result, "ok");
+    } finally {
+      await new Promise((resolve, reject) => {
+        server.close((err) => {
+          if (err) reject(err);
+          else resolve();
+        });
+      });
+    }
   });
 });
 
diff --git a/tests/unit/openai-to-claude-strip-empty.test.mjs b/tests/unit/openai-to-claude-strip-empty.test.mjs
index 497948c1..b139853c 100644
--- a/tests/unit/openai-to-claude-strip-empty.test.mjs
+++ b/tests/unit/openai-to-claude-strip-empty.test.mjs
@@ -1,7 +1,7 @@
 import test from "node:test";
 import assert from "node:assert/strict";
 
-const { stripEmptyTextBlocks, openaiToClaudeRequest } =
+const { stripEmptyTextBlocks, openaiToClaudeRequest, normalizeContentToString } =
   await import("../../open-sse/translator/request/openai-to-claude.ts");
 
 test("stripEmptyTextBlocks removes empty text recursively inside tool_result content", () => {
@@ -74,3 +74,34 @@ test("openaiToClaudeRequest applies strip to tool message array content", () =>
   const toolResult = toolMessage.content.find((b) => b.type === "tool_result");
   assert.deepEqual(toolResult.content, [{ type: "text", text: "tool ok" }]);
 });
+
+test("T15: normalizeContentToString supports array-form content blocks", () => {
+  const text = normalizeContentToString([
+    { type: "text", text: "line 1" },
+    { type: "image_url", image_url: { url: "data:image/png;base64,abc" } },
+    { type: "text", text: "line 2" },
+  ]);
+
+  assert.equal(text, "line 1\nline 2");
+});
+
+test("T15: openaiToClaudeRequest converts system array content into a Claude system text block", () => {
+  const request = {
+    messages: [
+      {
+        role: "system",
+        content: [
+          { type: "text", text: "System rules A" },
+          { type: "image_url", image_url: { url: "data:image/png;base64,abc" } },
+          { type: "text", text: "System rules B" },
+        ],
+      },
+      { role: "user", content: "hello" },
+    ],
+  };
+
+  const translated = openaiToClaudeRequest("claude-sonnet-4", request, false);
+  assert.ok(Array.isArray(translated.system));
+  // system[0] is the injected Claude prompt; user-provided system content is system[1].
+  assert.equal(translated.system[1].text, "System rules A\nSystem rules B");
+});
diff --git a/tests/unit/t07-no-log-key-config.test.mjs b/tests/unit/t07-no-log-key-config.test.mjs
index a368e5c0..9eeaf922 100644
--- a/tests/unit/t07-no-log-key-config.test.mjs
+++ b/tests/unit/t07-no-log-key-config.test.mjs
@@ -59,6 +59,11 @@ test("updateKeyPermissionsSchema accepts noLog-only updates and rejects empty pa
   const noLogOnly = schemas.validateBody(schemas.updateKeyPermissionsSchema, { noLog: true });
   assert.equal(noLogOnly.success, true);
 
+  const maxSessionsOnly = schemas.validateBody(schemas.updateKeyPermissionsSchema, {
+    maxSessions: 3,
+  });
+  assert.equal(maxSessionsOnly.success, true);
+
   const emptyPayload = schemas.validateBody(schemas.updateKeyPermissionsSchema, {});
   assert.equal(emptyPayload.success, false);
 });
diff --git a/tests/unit/t12-pricing-updates.test.mjs b/tests/unit/t12-pricing-updates.test.mjs
new file mode 100644
index 00000000..e9e511d4
--- /dev/null
+++ b/tests/unit/t12-pricing-updates.test.mjs
@@ -0,0 +1,34 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { getDefaultPricing } from "../../src/shared/constants/pricing.ts";
+import { REGISTRY } from "../../open-sse/config/providerRegistry.ts";
+
+test("T12: pricing table includes MiniMax, GLM, Kimi and gpt-5.4 mini entries", () => {
+  const pricing = getDefaultPricing();
+
+  assert.ok(pricing.cx["gpt-5.4"], "missing cx/gpt-5.4");
+  assert.ok(pricing.cx["gpt-5.4-mini"], "missing cx/gpt-5.4-mini");
+
+  assert.ok(pricing.minimax["minimax-m2.5"], "missing minimax/minimax-m2.5");
+  assert.ok(pricing.minimax["minimax-m2.7"], "missing minimax/minimax-m2.7");
+  assert.equal(pricing.minimax["minimax-m2.5"].input, 0.27);
+  assert.equal(pricing.minimax["minimax-m2.5"].output, 0.95);
+
+  assert.ok(pricing.glm["glm-4.7"], "missing glm/glm-4.7");
+  assert.ok(pricing.glm["glm-5"], "missing glm/glm-5");
+  assert.equal(pricing.glm["glm-4.7"].input, 0.38);
+  assert.equal(pricing.glm["glm-4.7"].output, 1.98);
+
+  assert.ok(pricing.kimi["kimi-k2.5"], "missing kimi/kimi-k2.5");
+  assert.ok(pricing.kimi["kimi-k2.5-thinking"], "missing kimi/kimi-k2.5-thinking");
+  assert.ok(pricing.kimi["kimi-for-coding"], "missing kimi/kimi-for-coding");
+});
+
+test("T12: minimax default model list starts with M2.7", () => {
+  const minimaxModels = REGISTRY.minimax.models.map((m) => m.id);
+  const minimaxCnModels = REGISTRY["minimax-cn"].models.map((m) => m.id);
+
+  assert.equal(minimaxModels[0], "minimax-m2.7");
+  assert.equal(minimaxCnModels[0], "minimax-m2.7");
+});
diff --git a/tests/unit/t13-stale-quota-display.test.mjs b/tests/unit/t13-stale-quota-display.test.mjs
new file mode 100644
index 00000000..57f5d144
--- /dev/null
+++ b/tests/unit/t13-stale-quota-display.test.mjs
@@ -0,0 +1,31 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { parseQuotaData } from "../../src/app/(dashboard)/dashboard/usage/components/ProviderLimits/utils.tsx";
+
+test("T13: parseQuotaData zeroes usage when resetAt is already in the past", () => {
+  const past = new Date(Date.now() - 60_000).toISOString();
+  const parsed = parseQuotaData("codex", {
+    quotas: {
+      session: { used: 83, total: 100, resetAt: past },
+    },
+  });
+
+  assert.equal(parsed.length, 1);
+  assert.equal(parsed[0].used, 0);
+  assert.equal(parsed[0].staleAfterReset, true);
+  assert.equal(parsed[0].remainingPercentage, 100);
+});
+
+test("T13: parseQuotaData keeps usage unchanged when resetAt is in the future", () => {
+  const future = new Date(Date.now() + 60_000).toISOString();
+  const parsed = parseQuotaData("codex", {
+    quotas: {
+      session: { used: 42, total: 100, resetAt: future },
+    },
+  });
+
+  assert.equal(parsed.length, 1);
+  assert.equal(parsed[0].used, 42);
+  assert.equal(parsed[0].staleAfterReset, false);
+});
diff --git a/tests/unit/t14-proxy-fast-fail.test.mjs b/tests/unit/t14-proxy-fast-fail.test.mjs
new file mode 100644
index 00000000..20a8b113
--- /dev/null
+++ b/tests/unit/t14-proxy-fast-fail.test.mjs
@@ -0,0 +1,35 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  isProxyReachable,
+  getCachedProxyHealth,
+  invalidateProxyHealth,
+} from "../../src/lib/proxyHealth.ts";
+import { runWithProxyContext } from "../../open-sse/utils/proxyFetch.ts";
+
+test("T14: isProxyReachable caches unreachable proxy result", async () => {
+  const proxyUrl = "http://127.0.0.1:1";
+  invalidateProxyHealth(proxyUrl);
+
+  const healthy = await isProxyReachable(proxyUrl, 120, 2_000);
+  assert.equal(healthy, false);
+  assert.equal(getCachedProxyHealth(proxyUrl), false);
+});
+
+test("T14: runWithProxyContext fast-fails when proxy is unreachable", async () => {
+  const proxyUrl = "http://127.0.0.1:1";
+  invalidateProxyHealth(proxyUrl);
+
+  let executed = false;
+  await assert.rejects(
+    () =>
+      runWithProxyContext(proxyUrl, async () => {
+        executed = true;
+        return "ok";
+      }),
+    (err) => err?.code === "PROXY_UNREACHABLE"
+  );
+
+  assert.equal(executed, false);
+});
diff --git a/tests/unit/t16-gemini-enum-type-string.test.mjs b/tests/unit/t16-gemini-enum-type-string.test.mjs
new file mode 100644
index 00000000..c921ee52
--- /dev/null
+++ b/tests/unit/t16-gemini-enum-type-string.test.mjs
@@ -0,0 +1,53 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { cleanJSONSchemaForAntigravity } =
+  await import("../../open-sse/translator/helpers/geminiHelper.ts");
+
+test("T16: enum-only fields gain type:string after Gemini schema cleanup", () => {
+  const schema = {
+    type: "object",
+    properties: {
+      mode: {
+        enum: ["fast", "balanced", "slow"],
+      },
+    },
+    required: ["mode"],
+  };
+
+  const cleaned = cleanJSONSchemaForAntigravity(schema);
+  assert.equal(cleaned.properties.mode.type, "string");
+  assert.deepEqual(cleaned.properties.mode.enum, ["fast", "balanced", "slow"]);
+});
+
+test("T16: existing explicit type:string is preserved", () => {
+  const schema = {
+    type: "object",
+    properties: {
+      mode: {
+        type: "string",
+        enum: ["auto", "manual"],
+      },
+    },
+  };
+
+  const cleaned = cleanJSONSchemaForAntigravity(schema);
+  assert.equal(cleaned.properties.mode.type, "string");
+  assert.deepEqual(cleaned.properties.mode.enum, ["auto", "manual"]);
+});
+
+test("T16: schemas without enum are not forced to string", () => {
+  const schema = {
+    type: "object",
+    properties: {
+      retries: {
+        type: "number",
+        minimum: 0,
+      },
+    },
+  };
+
+  const cleaned = cleanJSONSchemaForAntigravity(schema);
+  assert.equal(cleaned.properties.retries.type, "number");
+  assert.equal(cleaned.properties.retries.enum, undefined);
+});
diff --git a/tests/unit/t19-codex-responses-empty-content.test.mjs b/tests/unit/t19-codex-responses-empty-content.test.mjs
new file mode 100644
index 00000000..cc488073
--- /dev/null
+++ b/tests/unit/t19-codex-responses-empty-content.test.mjs
@@ -0,0 +1,66 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { translateNonStreamingResponse } =
+  await import("../../open-sse/handlers/responseTranslator.ts");
+const { FORMATS } = await import("../../open-sse/translator/formats.ts");
+
+test("T19: picks the last non-empty message content from Responses output", () => {
+  const responseBody = {
+    object: "response",
+    id: "resp_t19",
+    model: "gpt-5.2-codex",
+    created_at: 1710000000,
+    output: [
+      {
+        type: "message",
+        content: [{ type: "output_text", text: "" }],
+      },
+      {
+        type: "reasoning",
+        summary: [{ type: "summary_text", text: "thinking..." }],
+      },
+      {
+        type: "message",
+        content: [{ type: "output_text", text: "Resposta final" }],
+      },
+    ],
+    usage: { input_tokens: 10, output_tokens: 5 },
+  };
+
+  const translated = translateNonStreamingResponse(
+    responseBody,
+    FORMATS.OPENAI_RESPONSES,
+    FORMATS.OPENAI
+  );
+
+  assert.equal(translated.choices[0].message.content, "Resposta final");
+});
+
+test("T19: falls back to last message block when all message texts are empty", () => {
+  const responseBody = {
+    object: "response",
+    id: "resp_t19_empty",
+    model: "gpt-5.2-codex",
+    created_at: 1710000001,
+    output: [
+      {
+        type: "message",
+        content: [{ type: "output_text", text: "" }],
+      },
+      {
+        type: "message",
+        content: [{ type: "output_text", text: "" }],
+      },
+    ],
+  };
+
+  const translated = translateNonStreamingResponse(
+    responseBody,
+    FORMATS.OPENAI_RESPONSES,
+    FORMATS.OPENAI
+  );
+
+  assert.equal(translated.choices[0].message.content, "");
+  assert.equal(translated.choices[0].finish_reason, "stop");
+});
diff --git a/tests/unit/t20-t22-provider-headers.test.mjs b/tests/unit/t20-t22-provider-headers.test.mjs
new file mode 100644
index 00000000..58325929
--- /dev/null
+++ b/tests/unit/t20-t22-provider-headers.test.mjs
@@ -0,0 +1,31 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { platform, arch } from "node:os";
+
+const { REGISTRY } = await import("../../open-sse/config/providerRegistry.ts");
+
+test("T20: antigravity config has updated User-Agent and sandbox fallback URL", () => {
+  const antigravity = REGISTRY.antigravity;
+  assert.ok(Array.isArray(antigravity.baseUrls));
+  assert.ok(antigravity.baseUrls.includes("https://daily-cloudcode-pa.sandbox.googleapis.com"));
+  assert.match(
+    antigravity.headers["User-Agent"],
+    new RegExp(`^antigravity/1\\.107\\.0\\s+${platform()}\\/${arch()}$`)
+  );
+});
+
+test("T22: github headers include updated editor/plugin versions and required fields", () => {
+  const github = REGISTRY.github;
+  assert.equal(github.headers["editor-version"], "vscode/1.110.0");
+  assert.equal(github.headers["editor-plugin-version"], "copilot-chat/0.38.0");
+  assert.equal(github.headers["user-agent"], "GitHubCopilotChat/0.38.0");
+  assert.equal(github.headers["x-github-api-version"], "2025-04-01");
+  assert.equal(github.headers["x-vscode-user-agent-library-version"], "electron-fetch");
+  assert.equal(github.headers["X-Initiator"], "user");
+});
+
+test("T22: github config exposes dedicated responses endpoint", () => {
+  const github = REGISTRY.github;
+  assert.equal(github.responsesBaseUrl, "https://api.githubcopilot.com/responses");
+  assert.equal(github.baseUrl, "https://api.githubcopilot.com/chat/completions");
+});
diff --git a/tests/unit/t23-t24-fallback-resilience.test.mjs b/tests/unit/t23-t24-fallback-resilience.test.mjs
new file mode 100644
index 00000000..55b6e22d
--- /dev/null
+++ b/tests/unit/t23-t24-fallback-resilience.test.mjs
@@ -0,0 +1,141 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { checkFallbackError } = await import("../../open-sse/services/accountFallback.ts");
+const { handleComboChat } = await import("../../open-sse/services/combo.ts");
+const { resetAllCircuitBreakers } = await import("../../src/shared/utils/circuitBreaker.ts");
+
+test.beforeEach(() => {
+  resetAllCircuitBreakers();
+});
+
+function createLog() {
+  const entries = [];
+  return {
+    info: (tag, msg) => entries.push({ level: "info", tag, msg }),
+    warn: (tag, msg) => entries.push({ level: "warn", tag, msg }),
+    error: (tag, msg) => entries.push({ level: "error", tag, msg }),
+    entries,
+  };
+}
+
+function createStatusSequenceHandler(sequence) {
+  let idx = 0;
+  return async () => {
+    const step = sequence[idx++] || { status: 200 };
+    if (step.status === 200) {
+      return new Response(JSON.stringify({ ok: true }), { status: 200 });
+    }
+    return new Response(
+      JSON.stringify({
+        error: { message: step.message || `Error ${step.status}` },
+      }),
+      {
+        status: step.status,
+        headers: step.headers || { "content-type": "application/json" },
+      }
+    );
+  };
+}
+
+test("T23: 429 with long Retry-After uses real reset cooldown instead of short exponential backoff", () => {
+  const headers = new Headers({ "retry-after": "3600" });
+  const result = checkFallbackError(429, "Rate limit exceeded", 2, null, "groq", headers);
+
+  assert.equal(result.shouldFallback, true);
+  assert.equal(result.reason, "rate_limit_exceeded");
+  assert.equal(result.newBackoffLevel, 0);
+  assert.ok(result.cooldownMs > 3_590_000);
+});
+
+test("T24: combo awaits short 503 cooldown before falling through to next model", async () => {
+  const log = createLog();
+
+  const result = await handleComboChat({
+    body: {},
+    combo: {
+      name: "t24-short-cooldown",
+      strategy: "priority",
+      models: [
+        { model: "groq/model-a", weight: 0 },
+        { model: "groq/model-b", weight: 0 },
+      ],
+    },
+    // Two transient failures on first model, then success on fallback model.
+    handleSingleModel: createStatusSequenceHandler([
+      { status: 503 },
+      { status: 503 },
+      { status: 200 },
+    ]),
+    isModelAvailable: () => true,
+    log,
+    settings: null,
+    allCombos: null,
+  });
+
+  assert.equal(result.ok, true);
+  const waitLog = log.entries.find((e) => e.msg.includes("Waiting") && e.msg.includes("fallback"));
+  assert.ok(waitLog);
+});
+
+test("T24: combo skips wait when 503 cooldown is long (>5s)", async () => {
+  const log = createLog();
+
+  const result = await handleComboChat({
+    body: {},
+    combo: {
+      name: "t24-long-cooldown",
+      strategy: "priority",
+      models: [
+        { model: "groq/model-a", weight: 0 },
+        { model: "groq/model-b", weight: 0 },
+      ],
+    },
+    handleSingleModel: createStatusSequenceHandler([
+      {
+        status: 503,
+        message: "rate limit exceeded",
+        headers: { "content-type": "application/json", "retry-after": "120" },
+      },
+      {
+        status: 503,
+        message: "rate limit exceeded",
+        headers: { "content-type": "application/json", "retry-after": "120" },
+      },
+      { status: 200 },
+    ]),
+    isModelAvailable: () => true,
+    log,
+    settings: null,
+    allCombos: null,
+  });
+
+  assert.equal(result.ok, true);
+  const waitLog = log.entries.find((e) => e.msg.includes("Waiting") && e.msg.includes("fallback"));
+  assert.equal(waitLog, undefined);
+});
+
+test("T24: all inactive accounts return 503 service_unavailable (not 406)", async () => {
+  const result = await handleComboChat({
+    body: {},
+    combo: {
+      name: "t24-all-inactive",
+      strategy: "priority",
+      models: [
+        { model: "groq/model-a", weight: 0 },
+        { model: "groq/model-b", weight: 0 },
+      ],
+    },
+    handleSingleModel: async () => {
+      throw new Error("handleSingleModel should not be called when all models are unavailable");
+    },
+    isModelAvailable: () => false,
+    log: createLog(),
+    settings: null,
+    allCombos: null,
+  });
+
+  assert.equal(result.status, 503);
+  const body = await result.json();
+  assert.equal(body.error?.code, "ALL_ACCOUNTS_INACTIVE");
+});
diff --git a/tests/unit/t25-provider-validation-modelid-fallback.test.mjs b/tests/unit/t25-provider-validation-modelid-fallback.test.mjs
new file mode 100644
index 00000000..7912d30d
--- /dev/null
+++ b/tests/unit/t25-provider-validation-modelid-fallback.test.mjs
@@ -0,0 +1,116 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { validateProviderApiKey } = await import("../../src/lib/providers/validation.ts");
+
+test("T25: openai-compatible validation succeeds directly when /models works", async () => {
+  const originalFetch = globalThis.fetch;
+  const calls = [];
+
+  globalThis.fetch = async (url) => {
+    calls.push(String(url));
+    return new Response(JSON.stringify({ data: [] }), { status: 200 });
+  };
+
+  try {
+    const result = await validateProviderApiKey({
+      provider: "openai-compatible-chat-t25-models-ok",
+      apiKey: "sk-test",
+      providerSpecificData: { baseUrl: "https://api.example.com/v1" },
+    });
+
+    assert.equal(result.valid, true);
+    assert.equal(result.method, "models_endpoint");
+    assert.equal(calls.length, 1);
+    assert.equal(calls[0], "https://api.example.com/v1/models");
+  } finally {
+    globalThis.fetch = originalFetch;
+  }
+});
+
+test("T25: /models unavailable without Model ID returns actionable guidance", async () => {
+  const originalFetch = globalThis.fetch;
+  let callCount = 0;
+
+  globalThis.fetch = async () => {
+    callCount += 1;
+    return new Response(JSON.stringify({ error: "Not Found" }), { status: 404 });
+  };
+
+  try {
+    const result = await validateProviderApiKey({
+      provider: "openai-compatible-chat-t25-no-model-id",
+      apiKey: "sk-test",
+      providerSpecificData: { baseUrl: "https://api.example.com/v1" },
+    });
+
+    assert.equal(result.valid, false);
+    assert.match(result.error, /Provide a Model ID/i);
+    // Must stop after /models when no custom model was provided.
+    assert.equal(callCount, 1);
+  } finally {
+    globalThis.fetch = originalFetch;
+  }
+});
+
+test("T25: fallback chat probe detects invalid credentials with custom Model ID", async () => {
+  const originalFetch = globalThis.fetch;
+  const calls = [];
+
+  globalThis.fetch = async (url) => {
+    calls.push(String(url));
+    if (String(url).endsWith("/models")) {
+      return new Response(JSON.stringify({ error: "Not Found" }), { status: 404 });
+    }
+    return new Response(JSON.stringify({ error: "Unauthorized" }), { status: 401 });
+  };
+
+  try {
+    const result = await validateProviderApiKey({
+      provider: "openai-compatible-chat-t25-auth",
+      apiKey: "bad-key",
+      providerSpecificData: {
+        baseUrl: "https://api.example.com/v1",
+        validationModelId: "grok-3",
+      },
+    });
+
+    assert.equal(result.valid, false);
+    assert.equal(result.error, "Invalid API key");
+    assert.deepEqual(calls, [
+      "https://api.example.com/v1/models",
+      "https://api.example.com/v1/chat/completions",
+    ]);
+  } finally {
+    globalThis.fetch = originalFetch;
+  }
+});
+
+test("T25: fallback chat probe treats 429 as valid credentials with warning", async () => {
+  const originalFetch = globalThis.fetch;
+
+  globalThis.fetch = async (url) => {
+    if (String(url).endsWith("/models")) {
+      throw new Error("connect ECONNREFUSED");
+    }
+    return new Response(JSON.stringify({ error: "Rate limited" }), { status: 429 });
+  };
+
+  try {
+    const result = await validateProviderApiKey({
+      provider: "openai-compatible-chat-t25-rate-limit",
+      apiKey: "sk-test",
+      providerSpecificData: {
+        baseUrl: "https://api.example.com/v1",
+        validationModelId: "meta-llama/Llama-3.1-8B-Instruct",
+      },
+    });
+
+    assert.equal(result.valid, true);
+    assert.equal(result.error, null);
+    assert.equal(result.method, "chat_completions");
+    assert.match(result.warning, /Rate limited/i);
+  } finally {
+    globalThis.fetch = originalFetch;
+  }
+});
diff --git a/tests/unit/t26-ai-sdk-accept-header-compat.test.mjs b/tests/unit/t26-ai-sdk-accept-header-compat.test.mjs
new file mode 100644
index 00000000..9f9d44e8
--- /dev/null
+++ b/tests/unit/t26-ai-sdk-accept-header-compat.test.mjs
@@ -0,0 +1,30 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { clientWantsJsonResponse, resolveStreamFlag, stripMarkdownCodeFence } =
+  await import("../../open-sse/utils/aiSdkCompat.ts");
+
+test("T26: Accept application/json disables SSE stream mode", () => {
+  assert.equal(clientWantsJsonResponse("application/json"), true);
+  assert.equal(resolveStreamFlag(true, "application/json"), false);
+});
+
+test("T26: text/event-stream keeps SSE behavior", () => {
+  assert.equal(clientWantsJsonResponse("text/event-stream"), false);
+  assert.equal(resolveStreamFlag(true, "text/event-stream"), true);
+});
+
+test("T26: mixed Accept header prefers SSE only when text/event-stream is present", () => {
+  assert.equal(clientWantsJsonResponse("application/json, text/event-stream"), false);
+  assert.equal(resolveStreamFlag(true, "application/json, text/event-stream"), true);
+});
+
+test("T26: markdown code fence stripping unwraps Claude JSON blocks", () => {
+  const wrapped = '```json\n{"name":"omniroute"}\n```';
+  assert.equal(stripMarkdownCodeFence(wrapped), '{"name":"omniroute"}');
+});
+
+test("T26: non-fenced content is returned unchanged", () => {
+  const plain = '{"name":"omniroute"}';
+  assert.equal(stripMarkdownCodeFence(plain), plain);
+});
diff --git a/tests/unit/t27-github-copilot-response-format.test.mjs b/tests/unit/t27-github-copilot-response-format.test.mjs
new file mode 100644
index 00000000..61841358
--- /dev/null
+++ b/tests/unit/t27-github-copilot-response-format.test.mjs
@@ -0,0 +1,84 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { GithubExecutor } = await import("../../open-sse/executors/github.ts");
+const { BaseExecutor } = await import("../../open-sse/executors/base.ts");
+
+function streamFromChunks(chunks) {
+  const encoder = new TextEncoder();
+  return new ReadableStream({
+    start(controller) {
+      for (const chunk of chunks) {
+        controller.enqueue(encoder.encode(chunk));
+      }
+      controller.close();
+    },
+  });
+}
+
+test("T27: Claude + response_format=json_object injects system instruction and strips response_format field", () => {
+  const executor = new GithubExecutor();
+  const request = {
+    messages: [{ role: "user", content: "return json" }],
+    response_format: { type: "json_object" },
+  };
+
+  const transformed = executor.transformRequest("claude-sonnet-4.5", request, false, {});
+
+  assert.equal(transformed.response_format, undefined);
+  assert.equal(transformed.messages[0].role, "system");
+  assert.match(
+    transformed.messages[0].content,
+    /Respond only with valid JSON\. Do not include any text/i
+  );
+});
+
+test("T27: non-Claude models keep response_format untouched", () => {
+  const executor = new GithubExecutor();
+  const request = {
+    messages: [{ role: "user", content: "hello" }],
+    response_format: { type: "json_object" },
+  };
+
+  const transformed = executor.transformRequest("gpt-4o", request, false, {});
+  assert.deepEqual(transformed.response_format, { type: "json_object" });
+});
+
+test("T27: SSE [DONE] guard applies only in streaming mode", async () => {
+  const executor = new GithubExecutor();
+  const originalExecute = BaseExecutor.prototype.execute;
+
+  BaseExecutor.prototype.execute = async () => ({
+    response: new Response(
+      streamFromChunks(['data: {"delta":"hello"}\n\n', "data: [DONE]\n\n", "data: tail\n\n"]),
+      {
+        status: 200,
+        headers: { "content-type": "text/event-stream" },
+      }
+    ),
+    url: "https://api.githubcopilot.com/chat/completions",
+  });
+
+  try {
+    const streamingResult = await executor.execute({
+      model: "claude-sonnet-4.5",
+      body: { messages: [] },
+      stream: true,
+      credentials: { accessToken: "token" },
+    });
+    const streamingText = await streamingResult.response.text();
+    assert.equal(streamingText.includes("data: [DONE]"), false);
+    assert.equal(streamingText.includes("data: tail"), true);
+
+    const nonStreamingResult = await executor.execute({
+      model: "claude-sonnet-4.5",
+      body: { messages: [] },
+      stream: false,
+      credentials: { accessToken: "token" },
+    });
+    const nonStreamingText = await nonStreamingResult.response.text();
+    assert.equal(nonStreamingText.includes("data: [DONE]"), true);
+  } finally {
+    BaseExecutor.prototype.execute = originalExecute;
+  }
+});
diff --git a/tests/unit/t28-model-catalog-updates.test.mjs b/tests/unit/t28-model-catalog-updates.test.mjs
new file mode 100644
index 00000000..22ab769a
--- /dev/null
+++ b/tests/unit/t28-model-catalog-updates.test.mjs
@@ -0,0 +1,41 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { getModelInfoCore } from "../../open-sse/services/model.ts";
+import { REGISTRY } from "../../open-sse/config/providerRegistry.ts";
+
+test("T28: gemini catalog includes preview models from 9router", () => {
+  const geminiIds = REGISTRY.gemini.models.map((m) => m.id);
+  const geminiCliIds = REGISTRY["gemini-cli"].models.map((m) => m.id);
+
+  assert.ok(geminiIds.includes("gemini-3.1-flash-lite-preview"));
+  assert.ok(geminiIds.includes("gemini-3-flash-preview"));
+  assert.ok(geminiCliIds.includes("gemini-3.1-flash-lite-preview"));
+  assert.ok(geminiCliIds.includes("gemini-3-flash-preview"));
+});
+
+test("T28: vertex catalog includes partner models when vertex executor is available", () => {
+  const vertexIds = REGISTRY.vertex.models.map((m) => m.id);
+
+  assert.ok(vertexIds.includes("deepseek-v3.2"));
+  assert.ok(vertexIds.includes("qwen3-next-80b"));
+  assert.ok(vertexIds.includes("glm-5"));
+});
+
+test("T28: new catalog models resolve through getModelInfoCore", async () => {
+  const minimax = await getModelInfoCore("minimax/minimax-m2.7", {});
+  assert.equal(minimax.provider, "minimax");
+  assert.equal(minimax.model, "minimax-m2.7");
+
+  const flashLite = await getModelInfoCore("gemini/gemini-3.1-flash-lite-preview", {});
+  assert.equal(flashLite.provider, "gemini");
+  assert.equal(flashLite.model, "gemini-3.1-flash-lite-preview");
+
+  const flashPreview = await getModelInfoCore("gemini/gemini-3-flash-preview", {});
+  assert.equal(flashPreview.provider, "gemini");
+  assert.equal(flashPreview.model, "gemini-3-flash-preview");
+
+  const vertexPartner = await getModelInfoCore("vertex/qwen3-next-80b", {});
+  assert.equal(vertexPartner.provider, "vertex");
+  assert.equal(vertexPartner.model, "qwen3-next-80b");
+});
diff --git a/tests/unit/t29-vertex-sa-json-executor.test.mjs b/tests/unit/t29-vertex-sa-json-executor.test.mjs
new file mode 100644
index 00000000..47cc63ab
--- /dev/null
+++ b/tests/unit/t29-vertex-sa-json-executor.test.mjs
@@ -0,0 +1,71 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { VertexExecutor } = await import("../../open-sse/executors/vertex.ts");
+
+const MIN_SA_JSON = JSON.stringify({
+  project_id: "vertex-project-123",
+});
+
+test("T29: Vertex executor builds regional Gemini URL from Service Account project", () => {
+  const executor = new VertexExecutor();
+  const url = executor.buildUrl("gemini-3.1-pro-preview", true, 0, {
+    apiKey: MIN_SA_JSON,
+    providerSpecificData: { region: "europe-west4" },
+  });
+
+  assert.equal(
+    url,
+    "https://aiplatform.googleapis.com/v1/projects/vertex-project-123/locations/europe-west4/publishers/google/models/gemini-3.1-pro-preview:streamGenerateContent?alt=sse"
+  );
+});
+
+test("T29: Vertex executor routes partner models to global openapi endpoint", () => {
+  const executor = new VertexExecutor();
+  const url = executor.buildUrl("deepseek-v3.2", false, 0, {
+    apiKey: MIN_SA_JSON,
+    providerSpecificData: { region: "us-central1" },
+  });
+
+  assert.equal(
+    url,
+    "https://aiplatform.googleapis.com/v1/projects/vertex-project-123/locations/global/endpoints/openapi/chat/completions"
+  );
+});
+
+test("T29: Vertex executor defaults region to us-central1 when not configured", () => {
+  const executor = new VertexExecutor();
+  const url = executor.buildUrl("gemini-2.5-flash", false, 0, {
+    apiKey: MIN_SA_JSON,
+    providerSpecificData: {},
+  });
+
+  assert.equal(
+    url,
+    "https://aiplatform.googleapis.com/v1/projects/vertex-project-123/locations/us-central1/publishers/google/models/gemini-2.5-flash:generateContent"
+  );
+});
+
+test("T29: Vertex executor headers include Bearer token and SSE Accept when streaming", () => {
+  const executor = new VertexExecutor();
+  const headers = executor.buildHeaders({ accessToken: "ya29.test-token" }, true);
+
+  assert.equal(headers["Content-Type"], "application/json");
+  assert.equal(headers.Authorization, "Bearer ya29.test-token");
+  assert.equal(headers.Accept, "text/event-stream");
+});
+
+test("T29: Vertex executor rejects invalid Service Account JSON clearly", async () => {
+  const executor = new VertexExecutor();
+
+  await assert.rejects(
+    () =>
+      executor.execute({
+        model: "gemini-2.5-flash",
+        body: { contents: [] },
+        stream: false,
+        credentials: { apiKey: "not-json" },
+      }),
+    /Service Account JSON/i
+  );
+});
diff --git a/tests/unit/t30-kiro-400-model-unavailable.test.mjs b/tests/unit/t30-kiro-400-model-unavailable.test.mjs
new file mode 100644
index 00000000..59237164
--- /dev/null
+++ b/tests/unit/t30-kiro-400-model-unavailable.test.mjs
@@ -0,0 +1,29 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { isModelUnavailableError, getNextFamilyFallback } =
+  await import("../../open-sse/services/modelFamilyFallback.ts");
+
+test("T30: Kiro 'improperly formed request' 400 is treated as model-unavailable", () => {
+  const unavailable = isModelUnavailableError(
+    400,
+    "Bad Request: improperly formed request for selected model"
+  );
+  assert.equal(unavailable, true);
+});
+
+test("T30: generic 400 without model-unavailable signal is not treated as unavailable", () => {
+  const unavailable = isModelUnavailableError(400, "Bad Request: malformed JSON body");
+  assert.equal(unavailable, false);
+});
+
+test("T30: 404 still maps to model-unavailable", () => {
+  const unavailable = isModelUnavailableError(404, "not found");
+  assert.equal(unavailable, true);
+});
+
+test("T30: model family helper returns a sibling candidate when available", () => {
+  const next = getNextFamilyFallback("gemini-3.1-pro-high", new Set(["gemini-3.1-pro-high"]));
+  assert.equal(typeof next, "string");
+  assert.notEqual(next, "gemini-3.1-pro-high");
+});
diff --git a/tests/unit/t31-t33-t34-t38-model-specs.test.mjs b/tests/unit/t31-t33-t34-t38-model-specs.test.mjs
new file mode 100644
index 00000000..8a2257bc
--- /dev/null
+++ b/tests/unit/t31-t33-t34-t38-model-specs.test.mjs
@@ -0,0 +1,53 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { REGISTRY } = await import("../../open-sse/config/providerRegistry.ts");
+const { resolveModelAlias: resolveDeprecatedAlias } =
+  await import("../../open-sse/services/modelDeprecation.ts");
+const { normalizeThinkingLevel } = await import("../../open-sse/services/thinkingBudget.ts");
+const {
+  MODEL_SPECS,
+  getModelSpec,
+  capMaxOutputTokens,
+  resolveModelAlias,
+  getDefaultThinkingBudget,
+  capThinkingBudget,
+} = await import("../../src/shared/constants/modelSpecs.ts");
+
+test("T31: registry exposes Gemini 3.1 Pro High/Low model IDs", () => {
+  const geminiIds = REGISTRY.gemini.models.map((m) => m.id);
+  assert.ok(geminiIds.includes("gemini-3.1-pro-high"));
+  assert.ok(geminiIds.includes("gemini-3.1-pro-low"));
+});
+
+test("T31: legacy Gemini aliases resolve to Gemini 3.1 IDs", () => {
+  assert.equal(resolveDeprecatedAlias("gemini-3-pro-high"), "gemini-3.1-pro-high");
+  assert.equal(resolveDeprecatedAlias("gemini-3-pro-low"), "gemini-3.1-pro-low");
+});
+
+test("T33: thinkingLevel string is converted into numeric thinkingBudget", () => {
+  const converted = normalizeThinkingLevel({
+    model: "gemini-3.1-pro-high",
+    generationConfig: {
+      thinkingConfig: { thinkingLevel: "HIGH" },
+    },
+  });
+
+  assert.equal(converted.generationConfig.thinkingConfig.thinkingBudget, 24576);
+  assert.equal(converted.generationConfig.thinkingConfig.thinkingLevel, undefined);
+});
+
+test("T34: max output tokens are capped by model spec", () => {
+  assert.equal(capMaxOutputTokens("gemini-3-flash", 131072), 65536);
+  assert.equal(capMaxOutputTokens("gemini-3-flash"), 65536);
+  assert.equal(capMaxOutputTokens("gemini-3.1-pro-high", 131072), 131072);
+});
+
+test("T38: modelSpecs exposes centralized helpers with alias and prefix lookup", () => {
+  assert.equal(typeof MODEL_SPECS["gemini-3.1-pro-high"], "object");
+  assert.equal(getModelSpec("gemini-3-pro-high").maxOutputTokens, 131072);
+  assert.equal(getModelSpec("gemini-3-flash-preview").maxOutputTokens, 65536);
+  assert.equal(resolveModelAlias("gemini-3-pro-low"), "gemini-3.1-pro-low");
+  assert.equal(getDefaultThinkingBudget("gemini-3.1-pro-high"), 24576);
+  assert.equal(capThinkingBudget("gemini-3.1-pro-low", 50000), 16000);
+});
diff --git a/tests/unit/t40-opencode-cli-tools-integration.test.mjs b/tests/unit/t40-opencode-cli-tools-integration.test.mjs
new file mode 100644
index 00000000..dd903beb
--- /dev/null
+++ b/tests/unit/t40-opencode-cli-tools-integration.test.mjs
@@ -0,0 +1,67 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import path from "node:path";
+
+const { CLI_TOOLS } = await import("../../src/shared/constants/cliTools.ts");
+const { resolveOpencodeConfigPath } = await import("../../src/shared/services/cliRuntime.ts");
+const { buildOpenCodeProviderConfig, mergeOpenCodeConfig } =
+  await import("../../src/shared/services/opencodeConfig.ts");
+
+test("T40: OpenCode card documents config paths and --variant usage", () => {
+  const opencode = CLI_TOOLS.opencode;
+  assert.ok(opencode, "OpenCode tool card must exist");
+
+  const notesText = (opencode.notes || [])
+    .map((note) => note?.text || "")
+    .join(" ")
+    .toLowerCase();
+
+  assert.match(notesText, /\.config\/opencode\/opencode\.json/);
+  assert.match(notesText, /%appdata%/);
+  assert.match(notesText, /--variant/);
+});
+
+test("T40: OpenCode config path resolves per-platform", () => {
+  const linuxWithXdg = resolveOpencodeConfigPath(
+    "linux",
+    { XDG_CONFIG_HOME: "/tmp/xdg-config-home" },
+    "/home/dev"
+  );
+  assert.equal(linuxWithXdg, path.join("/tmp/xdg-config-home", "opencode", "opencode.json"));
+
+  const linuxDefault = resolveOpencodeConfigPath("linux", {}, "/home/dev");
+  assert.equal(linuxDefault, path.join("/home/dev", ".config", "opencode", "opencode.json"));
+
+  const windowsPath = resolveOpencodeConfigPath(
+    "win32",
+    { APPDATA: "C:\\Users\\dev\\AppData\\Roaming" },
+    "C:\\Users\\dev"
+  );
+  assert.equal(
+    windowsPath,
+    path.join("C:\\Users\\dev\\AppData\\Roaming", "opencode", "opencode.json")
+  );
+});
+
+test("T40: OpenCode config generator includes endpoint and selected API key", () => {
+  const providerConfig = buildOpenCodeProviderConfig({
+    baseUrl: "http://localhost:20128/v1/",
+    apiKey: "sk_test_opencode",
+    model: "claude-sonnet-4-5-thinking",
+  });
+  assert.equal(providerConfig.baseURL, "http://localhost:20128/v1");
+  assert.equal(providerConfig.apiKey, "sk_test_opencode");
+  assert.ok(providerConfig.models.includes("claude-sonnet-4-5-thinking"));
+
+  const mergedConfig = mergeOpenCodeConfig(
+    { providers: { custom: { name: "Custom Provider" } } },
+    {
+      baseUrl: "http://localhost:20128/v1",
+      apiKey: "sk_test_opencode",
+      model: "claude-sonnet-4-5-thinking",
+    }
+  );
+  assert.ok(mergedConfig.providers.custom);
+  assert.equal(mergedConfig.providers.omniroute.baseURL, "http://localhost:20128/v1");
+  assert.equal(mergedConfig.providers.omniroute.apiKey, "sk_test_opencode");
+});
diff --git a/tests/unit/t42-image-size-to-aspect-ratio.test.mjs b/tests/unit/t42-image-size-to-aspect-ratio.test.mjs
new file mode 100644
index 00000000..4755a53c
--- /dev/null
+++ b/tests/unit/t42-image-size-to-aspect-ratio.test.mjs
@@ -0,0 +1,96 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { mapImageSize } = await import("../../open-sse/translator/image/sizeMapper.ts");
+const { handleImageGeneration } = await import("../../open-sse/handlers/imageGeneration.ts");
+const { IMAGE_PROVIDERS } = await import("../../open-sse/config/imageRegistry.ts");
+
+test("T42: size mapper converts OpenAI sizes and preserves direct aspect ratios", () => {
+  assert.equal(mapImageSize("1024x1024"), "1:1");
+  assert.equal(mapImageSize("1792x1024"), "16:9");
+  assert.equal(mapImageSize("16:9"), "16:9");
+  assert.equal(mapImageSize("333x777"), "1:1");
+  assert.equal(mapImageSize(undefined), "1:1");
+});
+
+test("T42: Imagen3 requests send mapped aspect_ratio and normalize to OpenAI response shape", async () => {
+  const testProviderId = "t42-imagen3";
+  const originalProvider = IMAGE_PROVIDERS[testProviderId];
+  const originalFetch = globalThis.fetch;
+  let capturedRequestBody = null;
+
+  IMAGE_PROVIDERS[testProviderId] = {
+    id: testProviderId,
+    baseUrl: "https://example.com/imagen3",
+    authType: "apikey",
+    authHeader: "bearer",
+    format: "imagen3",
+    models: [{ id: "test-model", name: "Test Imagen3" }],
+    supportedSizes: ["1024x1024", "1792x1024", "16:9"],
+  };
+
+  globalThis.fetch = async (_url, options = {}) => {
+    capturedRequestBody = JSON.parse(String(options.body || "{}"));
+    return new Response(
+      JSON.stringify({
+        images: [{ image: "ZmFrZS1pbWFnZS1iYXNlNjQ=" }],
+      }),
+      {
+        status: 200,
+        headers: { "content-type": "application/json" },
+      }
+    );
+  };
+
+  try {
+    const resultLandscape = await handleImageGeneration({
+      body: {
+        model: `${testProviderId}/test-model`,
+        prompt: "a mountain at sunrise",
+        size: "1792x1024",
+        n: 1,
+      },
+      credentials: { apiKey: "test-key" },
+      log: { info: () => {}, error: () => {} },
+    });
+
+    assert.equal(capturedRequestBody.aspect_ratio, "16:9");
+    assert.equal(resultLandscape.success, true);
+    assert.ok(Number.isFinite(resultLandscape.data.created));
+    assert.ok(Array.isArray(resultLandscape.data.data));
+    assert.equal(resultLandscape.data.data[0].b64_json, "ZmFrZS1pbWFnZS1iYXNlNjQ=");
+
+    const resultDirectRatio = await handleImageGeneration({
+      body: {
+        model: `${testProviderId}/test-model`,
+        prompt: "portrait photo",
+        size: "16:9",
+        n: 1,
+      },
+      credentials: { apiKey: "test-key" },
+      log: { info: () => {}, error: () => {} },
+    });
+    assert.equal(capturedRequestBody.aspect_ratio, "16:9");
+    assert.equal(resultDirectRatio.success, true);
+
+    const resultFallback = await handleImageGeneration({
+      body: {
+        model: `${testProviderId}/test-model`,
+        prompt: "abstract art",
+        size: "333x777",
+        n: 1,
+      },
+      credentials: { apiKey: "test-key" },
+      log: { info: () => {}, error: () => {} },
+    });
+    assert.equal(capturedRequestBody.aspect_ratio, "1:1");
+    assert.equal(resultFallback.success, true);
+  } finally {
+    globalThis.fetch = originalFetch;
+    if (originalProvider) {
+      IMAGE_PROVIDERS[testProviderId] = originalProvider;
+    } else {
+      delete IMAGE_PROVIDERS[testProviderId];
+    }
+  }
+});
diff --git a/tests/unit/thinking-budget.test.mjs b/tests/unit/thinking-budget.test.mjs
index 68c2ce73..ee7cdcfa 100644
--- a/tests/unit/thinking-budget.test.mjs
+++ b/tests/unit/thinking-budget.test.mjs
@@ -169,9 +169,9 @@ test("EFFORT_BUDGETS has expected keys", () => {
 
 test("THINKING_LEVEL_MAP has all expected levels", () => {
   assert.equal(THINKING_LEVEL_MAP.none, 0);
-  assert.equal(THINKING_LEVEL_MAP.low, 1024);
-  assert.equal(THINKING_LEVEL_MAP.medium, 10240);
-  assert.equal(THINKING_LEVEL_MAP.high, 131072);
+  assert.equal(THINKING_LEVEL_MAP.low, 4096);
+  assert.equal(THINKING_LEVEL_MAP.medium, 8192);
+  assert.equal(THINKING_LEVEL_MAP.high, 24576);
 });
 
 test("normalizeThinkingLevel: converts thinkingLevel 'high' to budget", () => {
@@ -182,7 +182,7 @@ test("normalizeThinkingLevel: converts thinkingLevel 'high' to budget", () => {
   };
   const result = normalizeThinkingLevel(body);
   assert.equal(result.thinking.type, "enabled");
-  assert.equal(result.thinking.budget_tokens, 131072);
+  assert.equal(result.thinking.budget_tokens, 24576);
   assert.equal(result.thinkingLevel, undefined);
 });
 
@@ -194,7 +194,7 @@ test("normalizeThinkingLevel: converts thinking_level 'low' to budget", () => {
   };
   const result = normalizeThinkingLevel(body);
   assert.equal(result.thinking.type, "enabled");
-  assert.equal(result.thinking.budget_tokens, 1024);
+  assert.equal(result.thinking.budget_tokens, 4096);
   assert.equal(result.thinking_level, undefined);
 });
 
@@ -213,7 +213,7 @@ test("normalizeThinkingLevel: converts Gemini thinkingConfig.thinkingLevel", ()
     },
   };
   const result = normalizeThinkingLevel(body);
-  assert.equal(result.generationConfig.thinkingConfig.thinkingBudget, 131072);
+  assert.equal(result.generationConfig.thinkingConfig.thinkingBudget, 24576);
   assert.equal(result.generationConfig.thinking_config, undefined);
 });
 
@@ -269,7 +269,7 @@ test("applyThinkingBudget: thinkingLevel 'high' + PASSTHROUGH = converts and pas
     messages: [{ role: "user", content: "hello" }],
   };
   const result = applyThinkingBudget(body);
-  assert.equal(result.thinking.budget_tokens, 131072);
+  assert.equal(result.thinking.budget_tokens, 24576);
   assert.equal(result.thinkingLevel, undefined);
   setThinkingBudgetConfig(DEFAULT_THINKING_CONFIG);
 });