feat(chatgpt-web): image generation + edit (Open WebUI compatible) (#1607)

Integrated into release/v3.7.0
2026-04-28 06:19:46 +00:00 · 2026-04-26 05:51:37 +03:00 · 2026-04-26 05:51:37 +03:00 · 8a8fcc77a8
commit 8a8fcc77a8
parent 13495d4d13
14 changed files with 3187 additions and 80 deletions
--- a/.env.example
+++ b/.env.example
@ -195,6 +195,24 @@ CLOUD_URL=
 # Default: http://localhost:20128
 NEXT_PUBLIC_BASE_URL=http://localhost:20128

+# Browser-facing OmniRoute origin for generated assets in API responses.
+# Used by: chatgpt-web image generation cache URLs (/v1/chatgpt-web/image/<id>).
+# Set this when OpenWebUI or another relay reaches OmniRoute by an internal URL
+# but the user's browser must fetch images from a LAN, tunnel, or public origin.
+# Do not include /v1; if included accidentally it will be normalized away.
+# OMNIROUTE_PUBLIC_BASE_URL=http://192.168.0.15:20128
+
+# Max wait time for an async chatgpt-web image to land via the celsius
+# WebSocket, in milliseconds. Default 180000 (3 minutes). Increase during
+# upstream queue-deep windows ("Lots of people are creating images right now").
+# OMNIROUTE_CGPT_WEB_IMAGE_TIMEOUT_MS=180000
+
+# Total in-memory byte budget for the chatgpt-web image cache (used to serve
+# /v1/chatgpt-web/image/<id>), in megabytes. Default 256. Lower this if you
+# run OmniRoute on a memory-constrained host; raise it if image generation
+# is heavy and clients are racing the 30-minute TTL.
+# OMNIROUTE_CGPT_WEB_IMAGE_CACHE_MAX_MB=256
+
 # Public cloud URL — client-side mirror of CLOUD_URL.
 NEXT_PUBLIC_CLOUD_URL=

@ -261,6 +279,7 @@ NEXT_PUBLIC_ENABLE_SOCKS5_PROXY=true
 # Used by MCP server, A2A skills, and CLI sidecars to call the running instance.

 # Explicit base URL for MCP/A2A tools to reach OmniRoute (overrides localhost auto-detect).
+# For browser-visible generated image URLs, prefer OMNIROUTE_PUBLIC_BASE_URL above.
 # Used by: open-sse/mcp-server/server.ts, src/lib/a2a/
 # OMNIROUTE_BASE_URL=http://localhost:20128

--- a/next.config.mjs
+++ b/next.config.mjs
@ -18,6 +18,20 @@ const nextConfig = {
    },
  },
  output: "standalone",
+  // OmniRoute is a proxy for AI APIs — request bodies routinely include
+  // multi-MB payloads (vision models, image edits, base64-encoded files,
+  // long chat histories with embedded images). Next.js's Server Action
+  // handler intercepts POSTs with multipart/form-data or
+  // x-www-form-urlencoded content-types and enforces a 1 MB cap that
+  // surfaces as a 413 with a confusing "Server Actions" hint, even on
+  // pure route handlers. 50 MB matches what most upstream LLM providers
+  // accept for image-bearing requests; tune via env if a deployment needs
+  // more.
+  experimental: {
+    serverActions: {
+      bodySizeLimit: process.env.OMNIROUTE_SERVER_ACTIONS_BODY_LIMIT || "50mb",
+    },
+  },
  outputFileTracingRoot: projectRoot,
  outputFileTracingExcludes: {
    // Planning/task docs are not runtime assets and can break standalone copies
--- a/open-sse/config/imageRegistry.ts
+++ b/open-sse/config/imageRegistry.ts
@ -136,6 +136,17 @@ export const IMAGE_PROVIDERS: Record<string, ImageProviderConfig> = {
    supportedSizes: ["512x512", "1024x1024", "1024x1536", "1536x1024"],
  },

+  "chatgpt-web": {
+    id: "chatgpt-web",
+    alias: "cgpt-web",
+    baseUrl: "https://chatgpt.com/backend-api/f/conversation",
+    authType: "apikey",
+    authHeader: "cookie",
+    format: "chatgpt-web",
+    models: [{ id: "gpt-5.3-instant", name: "GPT-5.3 Instant (ChatGPT Web Image)" }],
+    supportedSizes: ["1024x1024", "1024x1536", "1536x1024"],
+  },
+
  xai: {
    id: "xai",
    baseUrl: "https://api.x.ai/v1/images/generations",
--- a/open-sse/executors/chatgpt-web.ts
+++ b/open-sse/executors/chatgpt-web.ts
--- a/open-sse/handlers/imageGeneration.ts
+++ b/open-sse/handlers/imageGeneration.ts
@ -19,6 +19,12 @@ import { randomUUID } from "crypto";
 import { getImageProvider, parseImageModel } from "../config/imageRegistry.ts";
 import { mapImageSize } from "../translator/image/sizeMapper.ts";
 import { getCodexClientVersion, getCodexUserAgent } from "../config/codexClient.ts";
+import { ChatGptWebExecutor } from "../executors/chatgpt-web.ts";
+import {
+  getChatGptImage,
+  findChatGptImageBySha256,
+} from "../services/chatgptImageCache.ts";
+import { createHash } from "node:crypto";
 import { saveCallLog } from "@/lib/usageDb";
 import {
  submitComfyWorkflow,
@ -113,7 +119,14 @@ const FAL_PRESET_SIZES = {
 * @param {object} options.log - Logger
 * @param {string} [options.resolvedProvider] - Pre-resolved provider ID (from route layer custom model resolution)
 */
-export async function handleImageGeneration({ body, credentials, log, resolvedProvider = null }) {
+export async function handleImageGeneration({
+  body,
+  credentials,
+  log,
+  resolvedProvider = null,
+  signal = null,
+  clientHeaders = null,
+}) {
  let provider, model;

  if (resolvedProvider) {
@ -257,6 +270,18 @@ export async function handleImageGeneration({ body, credentials, log, resolvedPr
    });
  }

+  if (providerConfig.format === "chatgpt-web") {
+    return handleChatGptWebImageGeneration({
+      model,
+      provider,
+      body,
+      credentials,
+      log,
+      signal,
+      clientHeaders,
+    });
+  }
+
  if (providerConfig.format === "nanobanana") {
    return handleNanoBananaImageGeneration({
      model,
@ -537,6 +562,370 @@ async function handleOpenAIImageGeneration({
  return result;
 }

+const CHATGPT_WEB_IMAGE_MARKDOWN_RE = /!\[[^\]]*\]\(([^)\s]+)\)/g;
+const CHATGPT_WEB_IMAGE_ID_RE = /\/v1\/chatgpt-web\/image\/([a-f0-9]{16,64})(?=[?\s"'<>)]|$)/i;
+
+function extractMarkdownImageUrls(text: string): string[] {
+  const urls: string[] = [];
+  // String.prototype.matchAll consumes a fresh iterator and ignores the
+  // regex's lastIndex, so no manual reset is required.
+  for (const match of text.matchAll(CHATGPT_WEB_IMAGE_MARKDOWN_RE)) {
+    if (match[1]) urls.push(match[1]);
+  }
+  return urls;
+}
+
+function buildChatGptWebImagePrompt(body): string {
+  const prompt = String(body.prompt || "").trim();
+  const details: string[] = [`Create an image for this prompt: ${prompt}`];
+  if (typeof body.size === "string" && body.size.trim()) {
+    details.push(`Requested size: ${body.size.trim()}.`);
+  }
+  if (typeof body.quality === "string" && body.quality.trim()) {
+    details.push(`Requested quality: ${body.quality.trim()}.`);
+  }
+  if (typeof body.style === "string" && body.style.trim()) {
+    details.push(`Requested style: ${body.style.trim()}.`);
+  }
+  return details.join("\n");
+}
+
+async function handleChatGptWebImageGeneration({
+  model,
+  provider,
+  body,
+  credentials,
+  log,
+  signal,
+  clientHeaders,
+}) {
+  const startTime = Date.now();
+  const prompt = typeof body.prompt === "string" ? body.prompt.trim() : "";
+  if (!prompt) {
+    return saveImageErrorResult({
+      provider,
+      model,
+      status: 400,
+      startTime,
+      error: "Prompt is required for ChatGPT Web image generation",
+    });
+  }
+
+  if (!credentials?.apiKey) {
+    return saveImageErrorResult({
+      provider,
+      model,
+      status: 401,
+      startTime,
+      error: "ChatGPT Web credentials missing session cookie",
+    });
+  }
+
+  // Each image is one chatgpt.com chat turn (~30s). Cap at 4 (matches OpenAI's
+  // own limit for image-1 / dall-e-3) so a stray n=1000 doesn't pin the
+  // executor for hours before the upstream HTTP timeout fires.
+  const CHATGPT_WEB_IMAGE_N_MAX = 4;
+  const rawCount =
+    Number.isInteger(body.n) && (body.n as number) > 0 ? (body.n as number) : 1;
+  if (rawCount > CHATGPT_WEB_IMAGE_N_MAX) {
+    return saveImageErrorResult({
+      provider,
+      model,
+      status: 400,
+      startTime,
+      error: `ChatGPT Web image generation supports n=1..${CHATGPT_WEB_IMAGE_N_MAX} (got ${rawCount}); each n is a separate ~30s chat turn.`,
+    });
+  }
+  const requestedCount = rawCount;
+  if (log && requestedCount > 1) {
+    log.warn(
+      "IMAGE",
+      `ChatGPT Web returns one image per chat turn; requested n=${requestedCount} will run sequentially`
+    );
+  }
+
+  const wantsBase64 = body.response_format === "b64_json";
+  const images: Array<{ url?: string; b64_json?: string }> = [];
+  const requestBody = {
+    model,
+    prompt: prompt.slice(0, 500),
+    size: body.size || undefined,
+    quality: body.quality || undefined,
+  };
+
+  for (let i = 0; i < requestedCount; i++) {
+    const executor = new ChatGptWebExecutor();
+    const result = await executor.execute({
+      model,
+      body: {
+        messages: [{ role: "user", content: buildChatGptWebImagePrompt(body) }],
+      },
+      stream: false,
+      credentials,
+      signal,
+      log,
+      clientHeaders,
+    });
+
+    const responseText = await result.response.text();
+    if (result.response.status >= 400) {
+      return saveImageErrorResult({
+        provider,
+        model,
+        status: result.response.status,
+        startTime,
+        error: responseText,
+        requestBody,
+      });
+    }
+
+    let content = "";
+    try {
+      const json = JSON.parse(responseText);
+      content = String(json?.choices?.[0]?.message?.content || "");
+    } catch {
+      content = responseText;
+    }
+
+    const urls = extractMarkdownImageUrls(content);
+    if (urls.length === 0) {
+      return saveImageErrorResult({
+        provider,
+        model,
+        status: 502,
+        startTime,
+        error: `ChatGPT Web completed without returning image markdown: ${content.slice(0, 300)}`,
+        requestBody,
+      });
+    }
+
+    for (const url of urls) {
+      if (!wantsBase64) {
+        images.push({ url });
+        continue;
+      }
+      const id = url.match(CHATGPT_WEB_IMAGE_ID_RE)?.[1];
+      const cached = id ? getChatGptImage(id) : null;
+      if (!cached) {
+        return saveImageErrorResult({
+          provider,
+          model,
+          status: 502,
+          startTime,
+          error: "ChatGPT Web image bytes expired before b64_json conversion",
+          requestBody,
+        });
+      }
+      images.push({ b64_json: cached.bytes.toString("base64") });
+    }
+  }
+
+  return saveImageSuccessResult({
+    provider,
+    model,
+    startTime,
+    requestBody,
+    responseBody: { images_count: images.length },
+    images,
+  });
+}
+
+/**
+ * Handle a multipart /v1/images/edits request for chatgpt-web. Open WebUI
+ * uploads the prior image's bytes; we hash them and look up our cache.
+ *
+ * The hash match is reliable because Open WebUI's image-gen pipeline
+ * downloads our /v1/chatgpt-web/image/<id> URL byte-for-byte and re-serves
+ * those exact bytes through its own file store. When the user asks to edit
+ * the image, OWUI uploads the same bytes back to us via multipart — same
+ * hash, we find the conversation context, and drive the executor with a
+ * synthetic chat thread that triggers continuation mode.
+ *
+ * No-match cases (cache evicted by TTL, or the user uploaded a foreign
+ * image) get a clear 400. We can't actually edit an image we don't have a
+ * conversation context for — chatgpt.com's image_gen tool needs the
+ * original conversation node, and we don't have a path to upload bytes
+ * directly.
+ */
+export async function handleImageEdit({
+  provider,
+  model,
+  body,
+  imageBytes,
+  credentials,
+  log,
+  signal = null,
+  clientHeaders = null,
+}: {
+  provider: string;
+  model: string;
+  body: Record<string, any>;
+  imageBytes: Buffer;
+  imageMime?: string; // accepted for symmetry with route layer; not used
+  credentials: any;
+  log: any;
+  signal?: AbortSignal | null;
+  clientHeaders?: Record<string, string> | null;
+}) {
+  const startTime = Date.now();
+  const prompt = typeof body.prompt === "string" ? body.prompt.trim() : "";
+  if (!prompt) {
+    return saveImageErrorResult({
+      provider,
+      model,
+      status: 400,
+      startTime,
+      error: "Prompt is required for image edit",
+    });
+  }
+
+  if (!credentials?.apiKey) {
+    return saveImageErrorResult({
+      provider,
+      model,
+      status: 401,
+      startTime,
+      error: "ChatGPT Web credentials missing session cookie",
+    });
+  }
+
+  const imageHash = createHash("sha256").update(imageBytes).digest("hex");
+  const cached = findChatGptImageBySha256(imageHash);
+
+  const wantsBase64 = body.response_format === "b64_json";
+  const requestBody = {
+    model,
+    prompt: prompt.slice(0, 500),
+    size: body.size || undefined,
+    image_hash: imageHash.slice(0, 16),
+    image_bytes: imageBytes.length,
+    cached_match: Boolean(cached?.entry.context),
+  };
+
+  if (!cached?.entry.context) {
+    // chatgpt-web's image_gen tool can only edit an image when we continue
+    // the original conversation node. If we never generated this image (or
+    // its 30-minute TTL elapsed), there's no node to continue. Return a
+    // clear, actionable error — much better than silently spawning an
+    // unrelated image and confusing the user.
+    log?.warn?.(
+      "IMAGE",
+      `chatgpt-web edit: no cached match for sha256=${imageHash.slice(0, 16)} (bytes=${imageBytes.length}); returning 400`
+    );
+    return saveImageErrorResult({
+      provider,
+      model,
+      status: 400,
+      startTime,
+      error:
+        "chatgpt-web image edit only works for images recently generated through this OmniRoute instance " +
+        "(cache window: 30 minutes). Re-generate the image and try the edit immediately, or disable image-edit " +
+        "in your client to use plain chat-completion edit prompts instead.",
+      requestBody,
+    });
+  }
+
+  // Build a synthetic chat thread that surfaces the cached image URL on
+  // the assistant turn. The executor's parseOpenAIMessages picks up the
+  // URL, findCachedImageContext resolves it to {conversationId,
+  // parentMessageId}, and looksLikeImageEditRequest fires on the user
+  // prompt — together producing a continuation request that actually
+  // edits the saved image.
+  //
+  // The synthetic user prompt is anchored with both an edit verb AND an
+  // image-gen verb so the executor's heuristics fire regardless of what
+  // wording the caller used ("now make it brighter", "tweak this", ...):
+  //   - looksLikeImageEditRequest: matches "edit" + "image" within 120 chars
+  //   - looksLikeImageGenRequest:  matches "generate" + "image" within 40 chars
+  // Either match alone would set forImageGen, but covering both is cheap
+  // insurance for prompts that don't fit common phrasings.
+  const messages: Array<{ role: string; content: string }> = [
+    {
+      role: "assistant",
+      // The base URL is irrelevant — only the path is parsed by
+      // CACHED_IMAGE_URL_RE in the executor's findCachedImageContext.
+      content: `![image](http://internal/v1/chatgpt-web/image/${cached.id})`,
+    },
+    {
+      role: "user",
+      content: `Edit the image and generate the new image: ${prompt}`,
+    },
+  ];
+
+  const executor = new ChatGptWebExecutor();
+  const result = await executor.execute({
+    model,
+    body: { messages },
+    stream: false,
+    credentials,
+    signal,
+    log,
+    clientHeaders,
+  });
+
+  const responseText = await result.response.text();
+  if (result.response.status >= 400) {
+    return saveImageErrorResult({
+      provider,
+      model,
+      status: result.response.status,
+      startTime,
+      error: responseText,
+      requestBody,
+    });
+  }
+
+  let content = "";
+  try {
+    const json = JSON.parse(responseText);
+    content = String(json?.choices?.[0]?.message?.content || "");
+  } catch {
+    content = responseText;
+  }
+
+  const urls = extractMarkdownImageUrls(content);
+  if (urls.length === 0) {
+    return saveImageErrorResult({
+      provider,
+      model,
+      status: 502,
+      startTime,
+      error: `ChatGPT Web edit completed without returning image markdown: ${content.slice(0, 300)}`,
+      requestBody,
+    });
+  }
+
+  const images: Array<{ url?: string; b64_json?: string }> = [];
+  for (const url of urls) {
+    if (!wantsBase64) {
+      images.push({ url });
+      continue;
+    }
+    const id = url.match(CHATGPT_WEB_IMAGE_ID_RE)?.[1];
+    const cachedNew = id ? getChatGptImage(id) : null;
+    if (!cachedNew) {
+      return saveImageErrorResult({
+        provider,
+        model,
+        status: 502,
+        startTime,
+        error: "ChatGPT Web image bytes expired before b64_json conversion",
+        requestBody,
+      });
+    }
+    images.push({ b64_json: cachedNew.bytes.toString("base64") });
+  }
+
+  return saveImageSuccessResult({
+    provider,
+    model,
+    startTime,
+    requestBody,
+    responseBody: { images_count: images.length, edit_match: Boolean(cached?.entry.context) },
+    images,
+  });
+}
+
 async function handleFalAIImageGeneration({
  model,
  provider,
--- a/open-sse/services/chatgptImageCache.ts
+++ b/open-sse/services/chatgptImageCache.ts
@ -0,0 +1,148 @@
+/**
+ * In-memory cache for ChatGPT-generated images so we can serve them via a
+ * regular HTTP URL instead of inlining megabytes of base64 into SSE deltas.
+ *
+ * Why: chatgpt.com's `image_asset_pointer` resolves to a session-signed
+ * `estuary/content` URL that 403s for any anonymous client. We have to
+ * download the bytes server-side (with the user's session) and re-serve
+ * them. Streaming the raw base64 back through SSE works but Open WebUI's
+ * progressive markdown renderer displays each chunk as text mid-stream —
+ * the user sees ~3 MB of base64 scroll past before the final `)` arrives
+ * and the renderer recognizes it as an image. Hosting the image on a
+ * regular URL avoids that entirely: we emit a tiny `![image](http://...)`
+ * markdown delta and the browser fetches the image normally.
+ *
+ * The cache is in-memory only, with a short TTL — these URLs are single-use
+ * artifacts of one chat turn, not persistent assets. If the user reloads
+ * the conversation in a few hours the URLs will 404; that's expected.
+ */
+
+import { createHash, randomUUID } from "node:crypto";
+
+interface CachedImage {
+  bytes: Buffer;
+  mime: string;
+  expiresAt: number;
+  context?: ChatGptImageConversationContext;
+  /** sha256(bytes) — used by /v1/images/edits to correlate an uploaded
+   *  image (Open WebUI re-uploads the bytes via multipart) back to the
+   *  conversation context we cached when the image was first generated. */
+  bytesSha256: string;
+}
+
+const cache = new Map<string, CachedImage>();
+let cacheBytes = 0;
+const DEFAULT_TTL_MS = 30 * 60 * 1000;
+const MAX_ENTRIES = 200;
+// Per-entry images cap at 8 MB (enforced upstream in the executor) so 32 MB
+// covers ~4 large images. The byte cap matters more than entry count: a hot
+// loop of 8 MB images would otherwise pin 1.6 GB of RSS before count
+// eviction kicked in. Tune via OMNIROUTE_CGPT_WEB_IMAGE_CACHE_MAX_MB.
+const DEFAULT_MAX_BYTES = 256 * 1024 * 1024;
+
+function configuredMaxBytes(): number {
+  const raw = Number(process.env.OMNIROUTE_CGPT_WEB_IMAGE_CACHE_MAX_MB);
+  if (!Number.isFinite(raw) || raw <= 0) return DEFAULT_MAX_BYTES;
+  return Math.floor(raw * 1024 * 1024);
+}
+
+export interface ChatGptImageConversationContext {
+  conversationId: string;
+  parentMessageId: string;
+}
+
+function deleteEntry(id: string): void {
+  const entry = cache.get(id);
+  if (!entry) return;
+  cacheBytes -= entry.bytes.length;
+  cache.delete(id);
+}
+
+function evictExpired(now = Date.now()): void {
+  for (const [id, entry] of cache) {
+    if (now >= entry.expiresAt) deleteEntry(id);
+  }
+}
+
+function evictUntilWithinLimits(maxBytes: number, incomingBytes: number): void {
+  // Drop oldest until both the entry-count and total-byte caps are satisfied.
+  // Map iteration is insertion-ordered so the first key is the oldest entry.
+  while (
+    (cache.size >= MAX_ENTRIES || cacheBytes + incomingBytes > maxBytes) &&
+    cache.size > 0
+  ) {
+    const firstKey = cache.keys().next().value;
+    if (!firstKey) break;
+    deleteEntry(firstKey);
+  }
+}
+
+export function storeChatGptImage(
+  bytes: Buffer,
+  mime: string,
+  ttlMs = DEFAULT_TTL_MS,
+  context?: ChatGptImageConversationContext
+): string {
+  evictExpired();
+  evictUntilWithinLimits(configuredMaxBytes(), bytes.length);
+  const id = randomUUID().replace(/-/g, "");
+  const bytesSha256 = createHash("sha256").update(bytes).digest("hex");
+  cache.set(id, {
+    bytes,
+    mime,
+    expiresAt: Date.now() + ttlMs,
+    context,
+    bytesSha256,
+  });
+  cacheBytes += bytes.length;
+  return id;
+}
+
+export function getChatGptImage(id: string): CachedImage | null {
+  evictExpired();
+  const entry = cache.get(id);
+  if (!entry) return null;
+  if (Date.now() >= entry.expiresAt) {
+    deleteEntry(id);
+    return null;
+  }
+  return entry;
+}
+
+export function getChatGptImageConversationContext(
+  id: string
+): ChatGptImageConversationContext | null {
+  return getChatGptImage(id)?.context ?? null;
+}
+
+/**
+ * Look up a cached entry by sha256(bytes). Used by /v1/images/edits to
+ * correlate Open WebUI's re-uploaded image back to the conversation
+ * context we cached at generation time, so the executor can continue the
+ * saved chatgpt.com conversation node and actually edit the image instead
+ * of generating an unrelated one from scratch.
+ */
+export function findChatGptImageBySha256(
+  hash: string
+): { id: string; entry: CachedImage } | null {
+  evictExpired();
+  const target = hash.toLowerCase();
+  for (const [id, entry] of cache.entries()) {
+    if (entry.bytesSha256 === target) {
+      if (Date.now() < entry.expiresAt) return { id, entry };
+      deleteEntry(id);
+    }
+  }
+  return null;
+}
+
+/** Test-only: clear the cache between tests. */
+export function __resetChatGptImageCacheForTesting(): void {
+  cache.clear();
+  cacheBytes = 0;
+}
+
+/** Test-only: peek at current resident-byte total. */
+export function __getChatGptImageCacheBytesForTesting(): number {
+  return cacheBytes;
+}
--- a/open-sse/services/chatgptTlsClient.ts
+++ b/open-sse/services/chatgptTlsClient.ts
@ -110,6 +110,14 @@ export interface TlsFetchOptions {
  stream?: boolean;
  /** EOF marker the upstream sends to signal end of stream (default: "[DONE]"). */
  streamEofSymbol?: string;
+  /**
+   * If true, instructs the underlying tls-client to return the response body
+   * as a base64 `data:<mime>;base64,...` string (so binary payloads survive
+   * the JSON marshalling step). Required for image / binary downloads —
+   * without it, raw bytes get UTF-8-decoded and any non-ASCII byte is
+   * mangled. Default false (text mode).
+   */
+  byteResponse?: boolean;
 }

 export interface TlsFetchResult {
@ -161,6 +169,7 @@ export async function tlsFetchChatGpt(
    timeoutMilliseconds: options.timeoutMs ?? DEFAULT_TIMEOUT_MS,
    followRedirects: true,
    withRandomTLSExtensionOrder: true,
+    isByteResponse: options.byteResponse === true,
  };

  if (options.stream) {
--- a/src/app/api/v1/chat/completions/route.ts
+++ b/src/app/api/v1/chat/completions/route.ts
@ -31,6 +31,19 @@ export async function OPTIONS() {
 export async function POST(request) {
  await ensureInitialized();

+  // One-line marker for diagnosing 413 / Server-Action interceptions.
+  // Logs only when Content-Length is present so debug noise stays low for
+  // typical chat payloads. Toggle off via OMNIROUTE_LOG_REQUEST_SHAPE=0.
+  if (process.env.OMNIROUTE_LOG_REQUEST_SHAPE !== "0") {
+    const ct = request.headers.get("content-type") ?? "";
+    const cl = request.headers.get("content-length");
+    if (cl && Number(cl) > 256 * 1024) {
+      console.error(
+        `[CHAT-ROUTE] large body content-type="${ct}" content-length=${cl}`
+      );
+    }
+  }
+
  // Prompt injection guard — inspect body before forwarding
  try {
    const cloned = request.clone();
--- a/src/app/api/v1/chatgpt-web/image/[id]/route.ts
+++ b/src/app/api/v1/chatgpt-web/image/[id]/route.ts
@ -0,0 +1,42 @@
+import { CORS_HEADERS, handleCorsOptions } from "@/shared/utils/cors";
+import { getChatGptImage } from "@omniroute/open-sse/services/chatgptImageCache.ts";
+
+export async function OPTIONS() {
+  return handleCorsOptions();
+}
+
+/**
+ * Serve a cached ChatGPT-generated image by its opaque cache id.
+ *
+ * Auth: intentionally unauthenticated. The id is a 128-bit random UUID and
+ * the entry has a short TTL, so the URL is unguessable for the lifetime of
+ * the chat turn. We need it open because it's loaded by the user's BROWSER
+ * (via an `<img>` tag rendered from markdown) — that fetch doesn't carry
+ * the OmniRoute API key. Rate limiting / abuse protection sit at the
+ * network layer the same way they do for any other static asset.
+ */
+export async function GET(_request: Request, { params }: { params: Promise<{ id: string }> }) {
+  const { id } = await params;
+  const entry = getChatGptImage(id);
+  if (!entry) {
+    return new Response(JSON.stringify({ error: "Image not found or expired" }), {
+      status: 404,
+      headers: { "Content-Type": "application/json", ...CORS_HEADERS },
+    });
+  }
+  // entry.bytes is a Buffer (subclass of Uint8Array); pass it directly.
+  // Wrapping in `new Uint8Array(...)` would copy the entire payload — up to
+  // 8 MB per image — for no benefit.
+  return new Response(entry.bytes, {
+    status: 200,
+    headers: {
+      "Content-Type": entry.mime,
+      // Allow short browser caching — the id is unique-per-image, so a
+      // cache hit is fine and saves a round-trip if the user re-renders
+      // the chat. Beyond the in-memory TTL the URL 404s anyway.
+      "Cache-Control": "private, max-age=1800",
+      "Content-Length": String(entry.bytes.length),
+      ...CORS_HEADERS,
+    },
+  });
+}
--- a/src/app/api/v1/images/edits/route.ts
+++ b/src/app/api/v1/images/edits/route.ts
@ -0,0 +1,174 @@
+import { CORS_ORIGIN } from "@/shared/utils/cors";
+import { handleImageEdit } from "@omniroute/open-sse/handlers/imageGeneration.ts";
+import {
+  getProviderCredentials,
+  clearRecoveredProviderState,
+  extractApiKey,
+  isValidApiKey,
+} from "@/sse/services/auth";
+import { parseImageModel, getImageProvider } from "@omniroute/open-sse/config/imageRegistry.ts";
+import { errorResponse, unavailableResponse } from "@omniroute/open-sse/utils/error.ts";
+import { HTTP_STATUS } from "@omniroute/open-sse/config/constants.ts";
+import * as log from "@/sse/utils/logger";
+import { toJsonErrorPayload } from "@/shared/utils/upstreamError";
+import { enforceApiKeyPolicy } from "@/shared/utils/apiKeyPolicy";
+
+/**
+ * /v1/images/edits — multipart edit endpoint matching OpenAI's images-edit API.
+ *
+ * Open WebUI's "Image Edit" toggle (images.edit.engine = "openai") posts here
+ * with `prompt` + `image` (file). For chatgpt-web, an "edit" only makes sense
+ * if the uploaded image was originally generated through OmniRoute — we then
+ * have its `{conversationId, parentMessageId}` cached and can continue the
+ * saved chatgpt.com conversation node, which is the only way to actually edit
+ * the image instead of generating an unrelated one from scratch.
+ *
+ * Without this route, multipart bodies trip Next.js's Server Action handler
+ * (which intercepts ALL POSTs with multipart/form-data content-type) and the
+ * client gets a confusing "Failed to find Server Action" 500.
+ */
+
+export async function OPTIONS() {
+  return new Response(null, {
+    headers: {
+      "Access-Control-Allow-Origin": CORS_ORIGIN,
+      "Access-Control-Allow-Methods": "POST, OPTIONS",
+      "Access-Control-Allow-Headers": "*",
+    },
+  });
+}
+
+const PUBLIC_BASE_URL_HEADER_KEYS = ["host", "x-forwarded-host", "x-forwarded-proto"] as const;
+
+function publicBaseUrlHeaders(headers: Headers): Record<string, string> {
+  const out: Record<string, string> = {};
+  for (const key of PUBLIC_BASE_URL_HEADER_KEYS) {
+    const value = headers.get(key);
+    if (value !== null) out[key] = value;
+  }
+  return out;
+}
+
+async function readMultipartImage(formData: FormData): Promise<{
+  prompt: string;
+  model: string | null;
+  size: string | null;
+  responseFormat: string | null;
+  imageBytes: Buffer | null;
+  imageMime: string | null;
+}> {
+  const promptRaw = formData.get("prompt");
+  const prompt = typeof promptRaw === "string" ? promptRaw.trim() : "";
+  const modelRaw = formData.get("model");
+  const model = typeof modelRaw === "string" ? modelRaw.trim() : null;
+  const sizeRaw = formData.get("size");
+  const size = typeof sizeRaw === "string" ? sizeRaw.trim() : null;
+  const respRaw = formData.get("response_format");
+  const responseFormat = typeof respRaw === "string" ? respRaw.trim() : null;
+
+  // OpenAI's API and Open WebUI both accept either a single `image` field or
+  // an `image[]` array. We use the first image when multiple are sent — the
+  // chatgpt-web edit tool can only edit one image per conversation node.
+  const imageEntry = formData.get("image") ?? formData.get("image[]");
+  if (!imageEntry || typeof imageEntry === "string") {
+    return { prompt, model, size, responseFormat, imageBytes: null, imageMime: null };
+  }
+  const file = imageEntry as File;
+  const imageBytes = Buffer.from(await file.arrayBuffer());
+  const imageMime = file.type || "image/png";
+  return { prompt, model, size, responseFormat, imageBytes, imageMime };
+}
+
+export async function POST(request: Request) {
+  let formData: FormData;
+  try {
+    formData = await request.formData();
+  } catch (err) {
+    log.warn("IMAGE", `Invalid multipart body: ${err instanceof Error ? err.message : String(err)}`);
+    return errorResponse(HTTP_STATUS.BAD_REQUEST, "Invalid multipart body");
+  }
+
+  const { prompt, model, size, responseFormat, imageBytes, imageMime } =
+    await readMultipartImage(formData);
+
+  if (!prompt) {
+    return errorResponse(HTTP_STATUS.BAD_REQUEST, "Missing required field: prompt");
+  }
+  if (!imageBytes || imageBytes.length === 0) {
+    return errorResponse(HTTP_STATUS.BAD_REQUEST, "Missing required field: image");
+  }
+
+  const apiKey = extractApiKey(request);
+  if (!isValidApiKey(apiKey)) {
+    const policyError = enforceApiKeyPolicy(apiKey);
+    if (policyError) {
+      return new Response(JSON.stringify(policyError.body), {
+        status: policyError.status,
+        headers: { "Content-Type": "application/json" },
+      });
+    }
+  }
+
+  const fullModel = model || "cgpt-web/gpt-5.3-instant";
+  const parsed = parseImageModel(fullModel);
+  const providerConfig = getImageProvider(parsed.provider);
+  if (!providerConfig) {
+    return errorResponse(HTTP_STATUS.BAD_REQUEST, `Unknown image provider: ${parsed.provider}`);
+  }
+  if (providerConfig.format !== "chatgpt-web") {
+    // We only implement edit for chatgpt-web today; everything else routes
+    // through generations which doesn't accept image inputs. Surface a
+    // useful error rather than silently dropping the image.
+    return errorResponse(
+      HTTP_STATUS.BAD_REQUEST,
+      `Image edit is only supported for chatgpt-web models (got ${parsed.provider})`
+    );
+  }
+
+  const credentials = await getProviderCredentials(parsed.provider, apiKey);
+  if (!credentials) {
+    return errorResponse(HTTP_STATUS.UNAUTHORIZED, `No credentials for provider: ${parsed.provider}`);
+  }
+  if (credentials.allRateLimited) {
+    return unavailableResponse(
+      HTTP_STATUS.RATE_LIMITED,
+      `[${parsed.provider}] All accounts rate limited`,
+      credentials.retryAfter,
+      credentials.retryAfterHuman
+    );
+  }
+
+  const result = await handleImageEdit({
+    provider: parsed.provider,
+    model: parsed.model,
+    body: {
+      prompt,
+      size: size ?? undefined,
+      response_format: responseFormat ?? undefined,
+      n: 1,
+    },
+    imageBytes,
+    imageMime,
+    credentials,
+    log,
+    signal: request.signal,
+    clientHeaders: publicBaseUrlHeaders(request.headers),
+  });
+
+  if (result.success) {
+    await clearRecoveredProviderState(credentials);
+    return new Response(JSON.stringify((result as any).data), {
+      status: 200,
+      headers: { "Content-Type": "application/json" },
+    });
+  }
+
+  const errorPayload = toJsonErrorPayload(
+    (result as any).error,
+    "Image edit provider error"
+  );
+  return new Response(JSON.stringify(errorPayload), {
+    status: (result as any).status,
+    headers: { "Content-Type": "application/json" },
+  });
+}
--- a/src/app/api/v1/images/generations/route.ts
+++ b/src/app/api/v1/images/generations/route.ts
@ -101,6 +101,23 @@ function hasImageGenerationInput(body: Record<string, unknown>) {
  return false;
 }

+// Forward only the host-shaped headers the chatgpt-web image handler needs
+// to derive the browser-facing public base URL. Avoid copying the full
+// request header set: it's wider than the handler needs (auth tokens,
+// content-type, etc.) and `Headers.forEach` collapses repeated values, which
+// would silently drop entries if a wider helper were reused for headers
+// that can legitimately repeat (e.g., set-cookie).
+const PUBLIC_BASE_URL_HEADER_KEYS = ["host", "x-forwarded-host", "x-forwarded-proto"] as const;
+
+function publicBaseUrlHeaders(headers: Headers): Record<string, string> {
+  const out: Record<string, string> = {};
+  for (const key of PUBLIC_BASE_URL_HEADER_KEYS) {
+    const value = headers.get(key);
+    if (value !== null) out[key] = value;
+  }
+  return out;
+}
+
 export async function POST(request) {
  let rawBody;
  try {
@ -228,6 +245,8 @@ export async function POST(request) {
    credentials,
    log,
    ...(isCustomModel && { resolvedProvider: provider }),
+    signal: request.signal,
+    clientHeaders: publicBaseUrlHeaders(request.headers),
  });

  if (result.success) {
--- a/src/lib/env/runtimeEnv.ts
+++ b/src/lib/env/runtimeEnv.ts
@ -66,6 +66,7 @@ export const webRuntimeEnvSchema = z.object({
  OMNIROUTE_DISABLE_BACKGROUND_SERVICES: optionalBooleanEnv,
  CLOUD_URL: optionalHttpUrl,
  NEXT_PUBLIC_CLOUD_URL: optionalHttpUrl,
+  OMNIROUTE_PUBLIC_BASE_URL: optionalHttpUrl,
  OMNIROUTE_BASE_URL: optionalHttpUrl,
  BASE_URL: optionalHttpUrl,
  NEXT_PUBLIC_BASE_URL: optionalHttpUrl,
--- a/src/types/global.d.ts
+++ b/src/types/global.d.ts
@ -21,6 +21,9 @@ declare namespace NodeJS {
    PORT?: string;
    API_HOST?: string;
    DASHBOARD_PORT?: string;
+    OMNIROUTE_PUBLIC_BASE_URL?: string;
+    OMNIROUTE_CGPT_WEB_IMAGE_TIMEOUT_MS?: string;
+    OMNIROUTE_CGPT_WEB_IMAGE_CACHE_MAX_MB?: string;
    OMNIROUTE_BASE_URL?: string;
    OMNIROUTE_DISABLE_BACKGROUND_SERVICES?: string;
    OMNIROUTE_PORT?: string;
--- a/tests/unit/chatgpt-web.test.ts
+++ b/tests/unit/chatgpt-web.test.ts