Merge branch 'main' into UX/connections-ux-tweaks

2026-05-20 00:51:36 +00:00 · 2026-05-16 01:55:19 +04:00 · 2026-05-16 01:55:19 +04:00 · 75646444d0
commit 75646444d0
parent 6869732e3c a70bf02bb8
8 changed files with 242 additions and 65 deletions
--- a/studio/backend/core/inference/external_provider.py
+++ b/studio/backend/core/inference/external_provider.py
@ -2870,7 +2870,17 @@ class ExternalProviderClient:
        response.raise_for_status()
        data = response.json()
        containers = data.get("data") if isinstance(data, dict) else None
-        return list(containers) if isinstance(containers, list) else []
+        result = list(containers) if isinstance(containers, list) else []
+        logger.info(
+            "openai_container_list.response count=%s items=%s",
+            len(result),
+            [
+                {"id": c.get("id"), "status": c.get("status")}
+                for c in result
+                if isinstance(c, dict)
+            ],
+        )
+        return result

    async def create_openai_container(
        self,
@ -2901,15 +2911,38 @@ class ExternalProviderClient:
    async def delete_openai_container(self, container_id: str) -> None:
        """DELETE /v1/containers/{id}. 404s are surfaced as HTTPError.

+        Uses a fresh httpx client (not the shared ``_http_client``) so
+        connection-pool state from earlier chat requests cannot
+        interfere — observed in the wild that DELETEs over the shared
+        pool returned ``deleted: true`` while the container persisted
+        in subsequent /containers list calls, even though the same
+        DELETE issued from a fresh client genuinely removed it.
+
        Verifies the response body reports ``deleted: true``. OpenAI
        returns a 2xx ``deleted: true`` body even when the request is
        silently rejected (e.g. missing OpenAI-Beta header), so a
        status-only check is not sufficient.
        """
-        response = await _http_client.delete(
-            f"{self.base_url}/containers/{container_id}",
-            headers = self._container_headers(),
-            timeout = self._timeout,
+        url = f"{self.base_url}/containers/{container_id}"
+        headers = self._container_headers()
+        logger.info(
+            "openai_container_delete.outbound url=%s has_auth=%s openai_beta=%s",
+            url,
+            "Authorization" in headers,
+            headers.get("OpenAI-Beta"),
+        )
+        async with httpx.AsyncClient(timeout = self._timeout) as fresh_client:
+            response = await fresh_client.delete(url, headers = headers)
+        logger.info(
+            "openai_container_delete.response status=%s cf_ray=%s "
+            "request_id=%s organization=%s project=%s processing_ms=%s body=%s",
+            response.status_code,
+            response.headers.get("cf-ray"),
+            response.headers.get("x-request-id"),
+            response.headers.get("openai-organization"),
+            response.headers.get("openai-project"),
+            response.headers.get("openai-processing-ms"),
+            response.text[:300],
        )
        response.raise_for_status()
        try:
--- a/studio/backend/models/inference.py
+++ b/studio/backend/models/inference.py
@ -650,11 +650,11 @@ class CreateOpenAIContainerBody(OpenAIContainerRequest):
    ttl_minutes: int = Field(
        20,
        ge = 1,
-        le = 10080,  # 1 week
+        le = 20,
        description = (
            "Idle-timeout TTL the new container will inherit (anchor="
-            "last_active_at). OpenAI's default is 20; we cap at one "
-            "week as a safety bound."
+            "last_active_at). OpenAI hard-caps this at 20 minutes and "
+            "rejects larger values with integer_above_max_value."
        ),
    )

--- a/studio/backend/routes/inference.py
+++ b/studio/backend/routes/inference.py
@ -1716,8 +1716,15 @@ async def list_openai_containers(
                status_code = 502,
                detail = f"Failed to reach OpenAI: {exc}",
            )
+        # OpenAI keeps expired containers in /v1/containers indefinitely
+        # with status="expired" — they're effectively dead but still
+        # listed. Hide them so the picker only shows usable containers.
        return ListOpenAIContainersResponse(
-            containers = [_summarize_container(c) for c in raw if isinstance(c, dict)],
+            containers = [
+                _summarize_container(c)
+                for c in raw
+                if isinstance(c, dict) and c.get("status") != "expired"
+            ],
        )
    finally:
        await client.close()
@ -1766,17 +1773,38 @@ async def delete_openai_container(
    current_subject: str = Depends(get_current_subject),
 ) -> None:
    """Delete a named container by id."""
+    logger.info(
+        "openai_container_delete.request subject=%s container_id=%s base_url=%s",
+        current_subject,
+        body.container_id,
+        body.provider_base_url,
+    )
    client = _resolve_openai_cloud_client(body)
    try:
        try:
            await client.delete_openai_container(body.container_id)
+            logger.info(
+                "openai_container_delete.success container_id=%s",
+                body.container_id,
+            )
        except httpx.HTTPStatusError as exc:
            detail = exc.response.text[:500] if exc.response is not None else str(exc)
+            logger.warning(
+                "openai_container_delete.openai_rejected container_id=%s status=%s body=%s",
+                body.container_id,
+                exc.response.status_code if exc.response else None,
+                detail,
+            )
            raise HTTPException(
                status_code = exc.response.status_code if exc.response else 502,
                detail = f"OpenAI rejected /containers delete: {detail}",
            )
        except httpx.HTTPError as exc:
+            logger.warning(
+                "openai_container_delete.transport_error container_id=%s error=%s",
+                body.container_id,
+                exc,
+            )
            raise HTTPException(
                status_code = 502,
                detail = f"Failed to reach OpenAI: {exc}",
--- a/studio/backend/tests/test_openai_container_crud.py
+++ b/studio/backend/tests/test_openai_container_crud.py
@ -149,3 +149,41 @@ def test_delete_propagates_openai_4xx(monkeypatch):

    with pytest.raises(httpx.HTTPStatusError):
        _drive(_make_client().delete_openai_container("cntr_missing"))
+
+
+def test_list_route_filters_expired_containers(monkeypatch):
+    """OpenAI keeps containers in /v1/containers indefinitely with
+    status="expired" after their idle TTL passes — they can't be
+    used but still show up. The list route must drop them so the
+    picker only surfaces usable containers."""
+    from routes import inference as inf_mod
+    from models.inference import OpenAIContainerRequest
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        return httpx.Response(
+            200,
+            json = {
+                "data": [
+                    {"id": "cntr_active", "name": "live", "status": "running"},
+                    {"id": "cntr_dead", "name": "old", "status": "expired"},
+                    {"id": "cntr_unknown", "name": "no-status"},
+                ],
+            },
+        )
+
+    _mock_http_client(monkeypatch, handler)
+
+    def fake_resolve(_body):
+        return _make_client()
+
+    monkeypatch.setattr(inf_mod, "_resolve_openai_cloud_client", fake_resolve)
+
+    body = OpenAIContainerRequest(
+        encrypted_api_key = "enc",
+        provider_base_url = "https://api.openai.com/v1",
+    )
+    response = _drive(inf_mod.list_openai_containers(body, current_subject = "u"))
+    ids = [c.id for c in response.containers]
+    assert "cntr_active" in ids
+    assert "cntr_unknown" in ids  # missing status is treated as usable
+    assert "cntr_dead" not in ids
--- a/studio/frontend/src/features/chat/api/openai-containers.ts
+++ b/studio/frontend/src/features/chat/api/openai-containers.ts
@ -115,7 +115,10 @@ export async function deleteOpenAIContainer(
      }),
    },
  );
-  if (!response.ok && response.status !== 204) {
+  // 404 = container already gone (deleted elsewhere, or expired-then-purged).
+  // Treat as idempotent success so a stale list entry doesn't surface as a
+  // confusing error — the caller will refresh and the entry will disappear.
+  if (!response.ok && response.status !== 204 && response.status !== 404) {
    throw new Error(await parseError(response));
  }
 }
--- a/studio/frontend/src/features/chat/components/openai-code-exec-section.tsx
+++ b/studio/frontend/src/features/chat/components/openai-code-exec-section.tsx
@ -59,12 +59,7 @@ import { ensureThreadRecord } from "../runtime-provider";
 const AUTO_OPTION_VALUE = "__auto__";
 const DEFAULT_TTL_MINUTES = 20;
 const TTL_MIN = 1;
-const TTL_MAX = 10080; // one week — matches backend bound
-// Cadence for re-fetching the container list while the section is
-// mounted. OpenAI's container TTL flips at minute granularity, so 30s
-// is fast enough that an expired container loses its ACTIVE pill within
-// half a minute without hammering /v1/containers.
-const REFRESH_POLL_MS = 30_000;
+const TTL_MAX = 20; // OpenAI hard cap on expires_after.minutes

 function ageLabel(epochSeconds: number | null | undefined): string {
  if (!epochSeconds) return "";
@ -114,12 +109,13 @@ export function OpenAICodeExecSection({
  const [createTtl, setCreateTtl] = useState<number>(
    provider.openaiContainerTtlMinutes ?? DEFAULT_TTL_MINUTES,
  );
-  // Target row for the destructive confirmation dialog. Held in state
-  // (rather than blocking with window.confirm) so the dialog sits inside
-  // the settings sheet instead of a native browser alert.
-  const [pendingDelete, setPendingDelete] =
-    useState<OpenAIContainerSummary | null>(null);
-  const [deleting, setDeleting] = useState(false);
+  // Ids that have been deleted in this session. Once tombstoned, an id
+  // stays hidden from the picker for the lifetime of the page — OpenAI's
+  // /containers list can keep returning a freshly-deleted id for an
+  // undocumented and variable amount of time, and an automatic re-show
+  // creates more confusion than it solves. Refreshing the page resets
+  // the tombstone naturally.
+  const [tombstones, setTombstones] = useState<Set<string>>(() => new Set());

  const thread = useLiveQuery(
    async () => (activeThreadId ? db.threads.get(activeThreadId) : undefined),
@ -127,14 +123,22 @@ export function OpenAICodeExecSection({
  );
  const activeContainerId = thread?.openaiCodeExecContainerId ?? null;

+  // Hide just-deleted containers even if OpenAI's list still returns them.
+  // This is the single chokepoint — every downstream view (sorted picker,
+  // auto-bind candidate, all-containers list) derives from visibleContainers.
+  const visibleContainers = useMemo(() => {
+    if (tombstones.size === 0) return containers;
+    return containers.filter((c) => !tombstones.has(c.id));
+  }, [containers, tombstones]);
+
  // Containers sorted newest-first by lastActiveAt so the dropdown's
  // default (auto-bind target) shows up first.
  const sortedContainers = useMemo(
    () =>
-      [...containers].sort(
+      [...visibleContainers].sort(
        (a, b) => (b.lastActiveAt ?? 0) - (a.lastActiveAt ?? 0),
      ),
-    [containers],
+    [visibleContainers],
  );

  // First running container by lastActiveAt — the auto-bind target and
@ -217,10 +221,18 @@ export function OpenAICodeExecSection({
  // the chat-adapter's lazy-create path will mint the first container
  // on first send.
  useEffect(() => {
-    if (!activeThreadId || activeContainerId || !firstRunningContainer) {
+    if (
+      !activeThreadId ||
+      activeContainerId ||
+      visibleContainers.length === 0
+    ) {
      return;
    }
-    const candidateId = firstRunningContainer.id;
+    const sorted = [...visibleContainers].sort(
+      (a, b) => (b.lastActiveAt ?? 0) - (a.lastActiveAt ?? 0),
+    );
+    const candidate = sorted[0];
+    if (!candidate) return;
    void (async () => {
      try {
        await ensureThreadRecord({
@ -234,25 +246,7 @@ export function OpenAICodeExecSection({
        // Best-effort; the chat-adapter will inherit/create on send.
      }
    })();
-  }, [activeThreadId, activeContainerId, firstRunningContainer]);
-
-  // Stale-bind sweeper: when the active thread is pinned to a container
-  // that the latest list no longer reports as running (expired by idle
-  // TTL, or deleted out-of-band), drop the binding so the auto-bind
-  // effect above can pick a healthy candidate and the chat adapter's
-  // lazy-create path can mint a fresh container on the next send. Only
-  // fires when `containers` has actually been fetched at least once to
-  // avoid clearing on a transient empty list during initial load.
-  useEffect(() => {
-    if (!activeThreadId || !activeContainerId) return;
-    if (containers.length === 0) return;
-    if (boundContainer && isContainerRunning(boundContainer)) return;
-    void db.threads
-      .update(activeThreadId, { openaiCodeExecContainerId: null })
-      .catch(() => {
-        /* best-effort cleanup */
-      });
-  }, [activeThreadId, activeContainerId, boundContainer, containers.length]);
+  }, [activeThreadId, activeContainerId, visibleContainers]);

  const ttlValue = provider.openaiContainerTtlMinutes ?? DEFAULT_TTL_MINUTES;

@ -304,7 +298,6 @@ export function OpenAICodeExecSection({
      toast.success(`Created container ${name}`);
      setCreateName("");
      setCreateOpen(false);
-      await refresh();
      // Auto-bind the just-created container to the active thread.
      // ensureThreadRecord first so the bind lands even when the user
      // creates a container before sending the first message — without
@ -331,6 +324,10 @@ export function OpenAICodeExecSection({
      );
    } finally {
      setCreating(false);
+      // Refresh even on failure: the request may have partially succeeded
+      // server-side (created container, lost response), and a re-fetch
+      // keeps the picker in sync with OpenAI's actual state.
+      await refresh();
    }
  };

@ -343,6 +340,14 @@ export function OpenAICodeExecSection({
        { apiKey, baseUrl: provider.baseUrl || null },
        id,
      );
+      // Tombstone the id so the picker hides it immediately even if
+      // OpenAI's list keeps returning it for a while.
+      setTombstones((prev) => {
+        if (prev.has(id)) return prev;
+        const next = new Set(prev);
+        next.add(id);
+        return next;
+      });
      // Clear any thread bindings pointing at the now-deleted id.
      const affected = await db.threads
        .filter((t) => t.openaiCodeExecContainerId === id)
@ -353,14 +358,15 @@ export function OpenAICodeExecSection({
        ),
      );
      toast.success(`Deleted container ${name || id}`);
-      setPendingDelete(null);
-      await refresh();
    } catch (err) {
      toast.error(
        `Delete failed: ${err instanceof Error ? err.message : "Unknown"}`,
      );
    } finally {
-      setDeleting(false);
+      // Always refresh so a stale list entry (e.g. container deleted
+      // elsewhere, or already expired) is purged from the UI even when
+      // the delete call itself errored.
+      await refresh();
    }
  };

@ -374,7 +380,7 @@ export function OpenAICodeExecSection({
          htmlFor="openai-container-ttl"
          className="min-w-0 text-[13px] font-medium leading-[1.25] tracking-nav text-nav-fg"
        >
-          Idle timeout (min)
+          New-container idle timeout (min, max 20)
        </label>
        <Input
          id="openai-container-ttl"
@ -409,19 +415,44 @@ export function OpenAICodeExecSection({
            />
          </Button>
        </div>
-        {isLoading && sortedContainers.length === 0 ? (
+        {/* When no containers exist yet, render a disabled placeholder
+            instead of the picker. The first one is created by the
+            chat-adapter on first send (lazy-create) and will appear
+            here after the next refresh. */}
+        {sortedContainers.length === 0 ? (
+          <div className="h-9 w-full rounded-md border border-primary/40 bg-background px-2 flex items-center text-sm text-muted-foreground">
+            (none yet — will be created on first send)
+          </div>
+        ) : (
+          <select
+            value={displayedContainerId ?? sortedContainers[0].id}
+            onChange={(e) => onPick(e.target.value)}
+            disabled={!activeThreadId}
+            className="h-9 w-full rounded-md border border-primary/40 bg-background px-2 text-sm font-medium shadow-sm focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-primary/40"
+          >
+            {sortedContainers.map((c) => (
+              <option key={c.id} value={c.id}>
+                {c.name ?? "(unnamed)"} · {c.id.slice(0, 14)}…
+                {c.lastActiveAt ? ` · active ${ageLabel(c.lastActiveAt)}` : ""}
+              </option>
+            ))}
+          </select>
+        )}
+      </div>
+
+      {/* Container list with delete actions — labeled and visually
+          quieter so it's clearly the "all containers, manage them"
+          area rather than the active selector above. */}
+      <div className="flex flex-col gap-1.5">
+        <span className="text-[11px] uppercase tracking-wider text-muted-foreground">
+          All containers
+        </span>
+        {isLoading && visibleContainers.length === 0 ? (
          <Skeleton className="h-16 w-full" />
-        ) : sortedContainers.length > 0 ? (
-          <ul className="flex max-h-52 flex-col gap-1 overflow-auto">
-            {sortedContainers.map((c) => {
-              const running = isContainerRunning(c);
-              const isActive = running && c.id === displayActiveId;
-              const ttlMinutes = c.expiresAfterMinutes ?? DEFAULT_TTL_MINUTES;
-              const canActivate =
-                activeThreadId != null && !isActive && running;
-              const statusLabel = !running
-                ? (c.status ?? "expired")
-                : null;
+        ) : visibleContainers.length > 0 ? (
+          <ul className="flex flex-col gap-1 max-h-44 overflow-auto">
+            {visibleContainers.map((c) => {
+              const isActive = c.id === activeContainerId;
              return (
                <li
                  key={c.id}
--- a/studio/frontend/src/features/chat/external-providers.ts
+++ b/studio/frontend/src/features/chat/external-providers.ts
@ -237,7 +237,7 @@ function normalizeProvider(raw: ExternalProviderConfig): ExternalProviderConfig
      providerType === "openai" &&
      typeof raw.openaiContainerTtlMinutes === "number" &&
      raw.openaiContainerTtlMinutes >= 1
-        ? Math.min(raw.openaiContainerTtlMinutes, 10080)
+        ? Math.min(raw.openaiContainerTtlMinutes, 20)
        : undefined,
  };
 }
--- a/studio/install_llama_prebuilt.py
+++ b/studio/install_llama_prebuilt.py
@ -3433,10 +3433,52 @@ def extract_archive(archive_path: Path, destination: Path) -> None:
            ) from exc
        return target

+    def _try_repair_missing_slash(
+        member_name: str, link_name: str, archive_names: set[str]
+    ) -> str | None:
+        """Some upstream llama.cpp Mac releases (e.g. b9165, b9169) ship
+        symlinks whose linkname is missing the directory separator AND
+        the leading character of the file basename between the
+        top-level dir and the rest of the path:
+
+            llama-b9165/libggml-rpc.0.dylib -> llama-b9165ibggml-rpc.0.11.1.dylib
+
+        That cannot be resolved as written. Detect the pattern
+        (linkname starts with the top-level dir name but no following
+        slash) and search archive entries under that dir for a real
+        file whose basename ends with the mangled suffix. Only accept
+        when the suffix uniquely identifies a real archive entry."""
+        if "/" not in member_name or "/" in link_name:
+            return None
+        top, _, _ = member_name.partition("/")
+        if not link_name.startswith(top) or len(link_name) <= len(top):
+            return None
+        bad_suffix = link_name[len(top) :]
+        if not bad_suffix or bad_suffix.startswith("/"):
+            return None
+        prefix = f"{top}/"
+        candidates = [
+            name
+            for name in archive_names
+            if name.startswith(prefix)
+            and "/" not in name[len(prefix) :]
+            and name[len(prefix) :].endswith(bad_suffix)
+        ]
+        if len(candidates) != 1:
+            return None
+        return candidates[0]
+
    def safe_link_target(
-        base: Path, member_name: str, link_name: str, target: Path
+        base: Path,
+        member_name: str,
+        link_name: str,
+        target: Path,
+        archive_names: set[str],
    ) -> tuple[str, Path]:
        normalized = link_name.replace("\\", "/")
+        repaired = _try_repair_missing_slash(member_name, normalized, archive_names)
+        if repaired is not None:
+            normalized = repaired
        link_path = Path(normalized)
        if link_path.is_absolute():
            raise PrebuiltFallback(
@ -3473,8 +3515,10 @@ def extract_archive(archive_path: Path, destination: Path) -> None:

    def extract_tar_safely(source: Path, base: Path) -> None:
        pending_links: list[tuple[tarfile.TarInfo, Path]] = []
+        archive_names: set[str] = set()
        with tarfile.open(source, "r:gz") as archive:
            for member in archive.getmembers():
+                archive_names.add(member.name)
                target = safe_extract_path(base, member.name)
                if member.isdir():
                    target.mkdir(parents = True, exist_ok = True)
@ -3501,7 +3545,7 @@ def extract_archive(archive_path: Path, destination: Path) -> None:
            progressed = False
            for member, target in unresolved:
                normalized_link, resolved_target = safe_link_target(
-                    base, member.name, member.linkname, target
+                    base, member.name, member.linkname, target, archive_names
                )
                if not resolved_target.exists() and not resolved_target.is_symlink():
                    next_round.append((member, target))