mirror of
https://github.com/unslothai/unsloth.git
synced 2026-05-20 00:51:36 +00:00
Merge branch 'main' into UX/connections-ux-tweaks
This commit is contained in:
commit
75646444d0
8 changed files with 242 additions and 65 deletions
|
|
@ -2870,7 +2870,17 @@ class ExternalProviderClient:
|
|||
response.raise_for_status()
|
||||
data = response.json()
|
||||
containers = data.get("data") if isinstance(data, dict) else None
|
||||
return list(containers) if isinstance(containers, list) else []
|
||||
result = list(containers) if isinstance(containers, list) else []
|
||||
logger.info(
|
||||
"openai_container_list.response count=%s items=%s",
|
||||
len(result),
|
||||
[
|
||||
{"id": c.get("id"), "status": c.get("status")}
|
||||
for c in result
|
||||
if isinstance(c, dict)
|
||||
],
|
||||
)
|
||||
return result
|
||||
|
||||
async def create_openai_container(
|
||||
self,
|
||||
|
|
@ -2901,15 +2911,38 @@ class ExternalProviderClient:
|
|||
async def delete_openai_container(self, container_id: str) -> None:
|
||||
"""DELETE /v1/containers/{id}. 404s are surfaced as HTTPError.
|
||||
|
||||
Uses a fresh httpx client (not the shared ``_http_client``) so
|
||||
connection-pool state from earlier chat requests cannot
|
||||
interfere — observed in the wild that DELETEs over the shared
|
||||
pool returned ``deleted: true`` while the container persisted
|
||||
in subsequent /containers list calls, even though the same
|
||||
DELETE issued from a fresh client genuinely removed it.
|
||||
|
||||
Verifies the response body reports ``deleted: true``. OpenAI
|
||||
returns a 2xx ``deleted: true`` body even when the request is
|
||||
silently rejected (e.g. missing OpenAI-Beta header), so a
|
||||
status-only check is not sufficient.
|
||||
"""
|
||||
response = await _http_client.delete(
|
||||
f"{self.base_url}/containers/{container_id}",
|
||||
headers = self._container_headers(),
|
||||
timeout = self._timeout,
|
||||
url = f"{self.base_url}/containers/{container_id}"
|
||||
headers = self._container_headers()
|
||||
logger.info(
|
||||
"openai_container_delete.outbound url=%s has_auth=%s openai_beta=%s",
|
||||
url,
|
||||
"Authorization" in headers,
|
||||
headers.get("OpenAI-Beta"),
|
||||
)
|
||||
async with httpx.AsyncClient(timeout = self._timeout) as fresh_client:
|
||||
response = await fresh_client.delete(url, headers = headers)
|
||||
logger.info(
|
||||
"openai_container_delete.response status=%s cf_ray=%s "
|
||||
"request_id=%s organization=%s project=%s processing_ms=%s body=%s",
|
||||
response.status_code,
|
||||
response.headers.get("cf-ray"),
|
||||
response.headers.get("x-request-id"),
|
||||
response.headers.get("openai-organization"),
|
||||
response.headers.get("openai-project"),
|
||||
response.headers.get("openai-processing-ms"),
|
||||
response.text[:300],
|
||||
)
|
||||
response.raise_for_status()
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -650,11 +650,11 @@ class CreateOpenAIContainerBody(OpenAIContainerRequest):
|
|||
ttl_minutes: int = Field(
|
||||
20,
|
||||
ge = 1,
|
||||
le = 10080, # 1 week
|
||||
le = 20,
|
||||
description = (
|
||||
"Idle-timeout TTL the new container will inherit (anchor="
|
||||
"last_active_at). OpenAI's default is 20; we cap at one "
|
||||
"week as a safety bound."
|
||||
"last_active_at). OpenAI hard-caps this at 20 minutes and "
|
||||
"rejects larger values with integer_above_max_value."
|
||||
),
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1716,8 +1716,15 @@ async def list_openai_containers(
|
|||
status_code = 502,
|
||||
detail = f"Failed to reach OpenAI: {exc}",
|
||||
)
|
||||
# OpenAI keeps expired containers in /v1/containers indefinitely
|
||||
# with status="expired" — they're effectively dead but still
|
||||
# listed. Hide them so the picker only shows usable containers.
|
||||
return ListOpenAIContainersResponse(
|
||||
containers = [_summarize_container(c) for c in raw if isinstance(c, dict)],
|
||||
containers = [
|
||||
_summarize_container(c)
|
||||
for c in raw
|
||||
if isinstance(c, dict) and c.get("status") != "expired"
|
||||
],
|
||||
)
|
||||
finally:
|
||||
await client.close()
|
||||
|
|
@ -1766,17 +1773,38 @@ async def delete_openai_container(
|
|||
current_subject: str = Depends(get_current_subject),
|
||||
) -> None:
|
||||
"""Delete a named container by id."""
|
||||
logger.info(
|
||||
"openai_container_delete.request subject=%s container_id=%s base_url=%s",
|
||||
current_subject,
|
||||
body.container_id,
|
||||
body.provider_base_url,
|
||||
)
|
||||
client = _resolve_openai_cloud_client(body)
|
||||
try:
|
||||
try:
|
||||
await client.delete_openai_container(body.container_id)
|
||||
logger.info(
|
||||
"openai_container_delete.success container_id=%s",
|
||||
body.container_id,
|
||||
)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
detail = exc.response.text[:500] if exc.response is not None else str(exc)
|
||||
logger.warning(
|
||||
"openai_container_delete.openai_rejected container_id=%s status=%s body=%s",
|
||||
body.container_id,
|
||||
exc.response.status_code if exc.response else None,
|
||||
detail,
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code = exc.response.status_code if exc.response else 502,
|
||||
detail = f"OpenAI rejected /containers delete: {detail}",
|
||||
)
|
||||
except httpx.HTTPError as exc:
|
||||
logger.warning(
|
||||
"openai_container_delete.transport_error container_id=%s error=%s",
|
||||
body.container_id,
|
||||
exc,
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code = 502,
|
||||
detail = f"Failed to reach OpenAI: {exc}",
|
||||
|
|
|
|||
|
|
@ -149,3 +149,41 @@ def test_delete_propagates_openai_4xx(monkeypatch):
|
|||
|
||||
with pytest.raises(httpx.HTTPStatusError):
|
||||
_drive(_make_client().delete_openai_container("cntr_missing"))
|
||||
|
||||
|
||||
def test_list_route_filters_expired_containers(monkeypatch):
|
||||
"""OpenAI keeps containers in /v1/containers indefinitely with
|
||||
status="expired" after their idle TTL passes — they can't be
|
||||
used but still show up. The list route must drop them so the
|
||||
picker only surfaces usable containers."""
|
||||
from routes import inference as inf_mod
|
||||
from models.inference import OpenAIContainerRequest
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(
|
||||
200,
|
||||
json = {
|
||||
"data": [
|
||||
{"id": "cntr_active", "name": "live", "status": "running"},
|
||||
{"id": "cntr_dead", "name": "old", "status": "expired"},
|
||||
{"id": "cntr_unknown", "name": "no-status"},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
_mock_http_client(monkeypatch, handler)
|
||||
|
||||
def fake_resolve(_body):
|
||||
return _make_client()
|
||||
|
||||
monkeypatch.setattr(inf_mod, "_resolve_openai_cloud_client", fake_resolve)
|
||||
|
||||
body = OpenAIContainerRequest(
|
||||
encrypted_api_key = "enc",
|
||||
provider_base_url = "https://api.openai.com/v1",
|
||||
)
|
||||
response = _drive(inf_mod.list_openai_containers(body, current_subject = "u"))
|
||||
ids = [c.id for c in response.containers]
|
||||
assert "cntr_active" in ids
|
||||
assert "cntr_unknown" in ids # missing status is treated as usable
|
||||
assert "cntr_dead" not in ids
|
||||
|
|
|
|||
|
|
@ -115,7 +115,10 @@ export async function deleteOpenAIContainer(
|
|||
}),
|
||||
},
|
||||
);
|
||||
if (!response.ok && response.status !== 204) {
|
||||
// 404 = container already gone (deleted elsewhere, or expired-then-purged).
|
||||
// Treat as idempotent success so a stale list entry doesn't surface as a
|
||||
// confusing error — the caller will refresh and the entry will disappear.
|
||||
if (!response.ok && response.status !== 204 && response.status !== 404) {
|
||||
throw new Error(await parseError(response));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,12 +59,7 @@ import { ensureThreadRecord } from "../runtime-provider";
|
|||
const AUTO_OPTION_VALUE = "__auto__";
|
||||
const DEFAULT_TTL_MINUTES = 20;
|
||||
const TTL_MIN = 1;
|
||||
const TTL_MAX = 10080; // one week — matches backend bound
|
||||
// Cadence for re-fetching the container list while the section is
|
||||
// mounted. OpenAI's container TTL flips at minute granularity, so 30s
|
||||
// is fast enough that an expired container loses its ACTIVE pill within
|
||||
// half a minute without hammering /v1/containers.
|
||||
const REFRESH_POLL_MS = 30_000;
|
||||
const TTL_MAX = 20; // OpenAI hard cap on expires_after.minutes
|
||||
|
||||
function ageLabel(epochSeconds: number | null | undefined): string {
|
||||
if (!epochSeconds) return "";
|
||||
|
|
@ -114,12 +109,13 @@ export function OpenAICodeExecSection({
|
|||
const [createTtl, setCreateTtl] = useState<number>(
|
||||
provider.openaiContainerTtlMinutes ?? DEFAULT_TTL_MINUTES,
|
||||
);
|
||||
// Target row for the destructive confirmation dialog. Held in state
|
||||
// (rather than blocking with window.confirm) so the dialog sits inside
|
||||
// the settings sheet instead of a native browser alert.
|
||||
const [pendingDelete, setPendingDelete] =
|
||||
useState<OpenAIContainerSummary | null>(null);
|
||||
const [deleting, setDeleting] = useState(false);
|
||||
// Ids that have been deleted in this session. Once tombstoned, an id
|
||||
// stays hidden from the picker for the lifetime of the page — OpenAI's
|
||||
// /containers list can keep returning a freshly-deleted id for an
|
||||
// undocumented and variable amount of time, and an automatic re-show
|
||||
// creates more confusion than it solves. Refreshing the page resets
|
||||
// the tombstone naturally.
|
||||
const [tombstones, setTombstones] = useState<Set<string>>(() => new Set());
|
||||
|
||||
const thread = useLiveQuery(
|
||||
async () => (activeThreadId ? db.threads.get(activeThreadId) : undefined),
|
||||
|
|
@ -127,14 +123,22 @@ export function OpenAICodeExecSection({
|
|||
);
|
||||
const activeContainerId = thread?.openaiCodeExecContainerId ?? null;
|
||||
|
||||
// Hide just-deleted containers even if OpenAI's list still returns them.
|
||||
// This is the single chokepoint — every downstream view (sorted picker,
|
||||
// auto-bind candidate, all-containers list) derives from visibleContainers.
|
||||
const visibleContainers = useMemo(() => {
|
||||
if (tombstones.size === 0) return containers;
|
||||
return containers.filter((c) => !tombstones.has(c.id));
|
||||
}, [containers, tombstones]);
|
||||
|
||||
// Containers sorted newest-first by lastActiveAt so the dropdown's
|
||||
// default (auto-bind target) shows up first.
|
||||
const sortedContainers = useMemo(
|
||||
() =>
|
||||
[...containers].sort(
|
||||
[...visibleContainers].sort(
|
||||
(a, b) => (b.lastActiveAt ?? 0) - (a.lastActiveAt ?? 0),
|
||||
),
|
||||
[containers],
|
||||
[visibleContainers],
|
||||
);
|
||||
|
||||
// First running container by lastActiveAt — the auto-bind target and
|
||||
|
|
@ -217,10 +221,18 @@ export function OpenAICodeExecSection({
|
|||
// the chat-adapter's lazy-create path will mint the first container
|
||||
// on first send.
|
||||
useEffect(() => {
|
||||
if (!activeThreadId || activeContainerId || !firstRunningContainer) {
|
||||
if (
|
||||
!activeThreadId ||
|
||||
activeContainerId ||
|
||||
visibleContainers.length === 0
|
||||
) {
|
||||
return;
|
||||
}
|
||||
const candidateId = firstRunningContainer.id;
|
||||
const sorted = [...visibleContainers].sort(
|
||||
(a, b) => (b.lastActiveAt ?? 0) - (a.lastActiveAt ?? 0),
|
||||
);
|
||||
const candidate = sorted[0];
|
||||
if (!candidate) return;
|
||||
void (async () => {
|
||||
try {
|
||||
await ensureThreadRecord({
|
||||
|
|
@ -234,25 +246,7 @@ export function OpenAICodeExecSection({
|
|||
// Best-effort; the chat-adapter will inherit/create on send.
|
||||
}
|
||||
})();
|
||||
}, [activeThreadId, activeContainerId, firstRunningContainer]);
|
||||
|
||||
// Stale-bind sweeper: when the active thread is pinned to a container
|
||||
// that the latest list no longer reports as running (expired by idle
|
||||
// TTL, or deleted out-of-band), drop the binding so the auto-bind
|
||||
// effect above can pick a healthy candidate and the chat adapter's
|
||||
// lazy-create path can mint a fresh container on the next send. Only
|
||||
// fires when `containers` has actually been fetched at least once to
|
||||
// avoid clearing on a transient empty list during initial load.
|
||||
useEffect(() => {
|
||||
if (!activeThreadId || !activeContainerId) return;
|
||||
if (containers.length === 0) return;
|
||||
if (boundContainer && isContainerRunning(boundContainer)) return;
|
||||
void db.threads
|
||||
.update(activeThreadId, { openaiCodeExecContainerId: null })
|
||||
.catch(() => {
|
||||
/* best-effort cleanup */
|
||||
});
|
||||
}, [activeThreadId, activeContainerId, boundContainer, containers.length]);
|
||||
}, [activeThreadId, activeContainerId, visibleContainers]);
|
||||
|
||||
const ttlValue = provider.openaiContainerTtlMinutes ?? DEFAULT_TTL_MINUTES;
|
||||
|
||||
|
|
@ -304,7 +298,6 @@ export function OpenAICodeExecSection({
|
|||
toast.success(`Created container ${name}`);
|
||||
setCreateName("");
|
||||
setCreateOpen(false);
|
||||
await refresh();
|
||||
// Auto-bind the just-created container to the active thread.
|
||||
// ensureThreadRecord first so the bind lands even when the user
|
||||
// creates a container before sending the first message — without
|
||||
|
|
@ -331,6 +324,10 @@ export function OpenAICodeExecSection({
|
|||
);
|
||||
} finally {
|
||||
setCreating(false);
|
||||
// Refresh even on failure: the request may have partially succeeded
|
||||
// server-side (created container, lost response), and a re-fetch
|
||||
// keeps the picker in sync with OpenAI's actual state.
|
||||
await refresh();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -343,6 +340,14 @@ export function OpenAICodeExecSection({
|
|||
{ apiKey, baseUrl: provider.baseUrl || null },
|
||||
id,
|
||||
);
|
||||
// Tombstone the id so the picker hides it immediately even if
|
||||
// OpenAI's list keeps returning it for a while.
|
||||
setTombstones((prev) => {
|
||||
if (prev.has(id)) return prev;
|
||||
const next = new Set(prev);
|
||||
next.add(id);
|
||||
return next;
|
||||
});
|
||||
// Clear any thread bindings pointing at the now-deleted id.
|
||||
const affected = await db.threads
|
||||
.filter((t) => t.openaiCodeExecContainerId === id)
|
||||
|
|
@ -353,14 +358,15 @@ export function OpenAICodeExecSection({
|
|||
),
|
||||
);
|
||||
toast.success(`Deleted container ${name || id}`);
|
||||
setPendingDelete(null);
|
||||
await refresh();
|
||||
} catch (err) {
|
||||
toast.error(
|
||||
`Delete failed: ${err instanceof Error ? err.message : "Unknown"}`,
|
||||
);
|
||||
} finally {
|
||||
setDeleting(false);
|
||||
// Always refresh so a stale list entry (e.g. container deleted
|
||||
// elsewhere, or already expired) is purged from the UI even when
|
||||
// the delete call itself errored.
|
||||
await refresh();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -374,7 +380,7 @@ export function OpenAICodeExecSection({
|
|||
htmlFor="openai-container-ttl"
|
||||
className="min-w-0 text-[13px] font-medium leading-[1.25] tracking-nav text-nav-fg"
|
||||
>
|
||||
Idle timeout (min)
|
||||
New-container idle timeout (min, max 20)
|
||||
</label>
|
||||
<Input
|
||||
id="openai-container-ttl"
|
||||
|
|
@ -409,19 +415,44 @@ export function OpenAICodeExecSection({
|
|||
/>
|
||||
</Button>
|
||||
</div>
|
||||
{isLoading && sortedContainers.length === 0 ? (
|
||||
{/* When no containers exist yet, render a disabled placeholder
|
||||
instead of the picker. The first one is created by the
|
||||
chat-adapter on first send (lazy-create) and will appear
|
||||
here after the next refresh. */}
|
||||
{sortedContainers.length === 0 ? (
|
||||
<div className="h-9 w-full rounded-md border border-primary/40 bg-background px-2 flex items-center text-sm text-muted-foreground">
|
||||
(none yet — will be created on first send)
|
||||
</div>
|
||||
) : (
|
||||
<select
|
||||
value={displayedContainerId ?? sortedContainers[0].id}
|
||||
onChange={(e) => onPick(e.target.value)}
|
||||
disabled={!activeThreadId}
|
||||
className="h-9 w-full rounded-md border border-primary/40 bg-background px-2 text-sm font-medium shadow-sm focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-primary/40"
|
||||
>
|
||||
{sortedContainers.map((c) => (
|
||||
<option key={c.id} value={c.id}>
|
||||
{c.name ?? "(unnamed)"} · {c.id.slice(0, 14)}…
|
||||
{c.lastActiveAt ? ` · active ${ageLabel(c.lastActiveAt)}` : ""}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Container list with delete actions — labeled and visually
|
||||
quieter so it's clearly the "all containers, manage them"
|
||||
area rather than the active selector above. */}
|
||||
<div className="flex flex-col gap-1.5">
|
||||
<span className="text-[11px] uppercase tracking-wider text-muted-foreground">
|
||||
All containers
|
||||
</span>
|
||||
{isLoading && visibleContainers.length === 0 ? (
|
||||
<Skeleton className="h-16 w-full" />
|
||||
) : sortedContainers.length > 0 ? (
|
||||
<ul className="flex max-h-52 flex-col gap-1 overflow-auto">
|
||||
{sortedContainers.map((c) => {
|
||||
const running = isContainerRunning(c);
|
||||
const isActive = running && c.id === displayActiveId;
|
||||
const ttlMinutes = c.expiresAfterMinutes ?? DEFAULT_TTL_MINUTES;
|
||||
const canActivate =
|
||||
activeThreadId != null && !isActive && running;
|
||||
const statusLabel = !running
|
||||
? (c.status ?? "expired")
|
||||
: null;
|
||||
) : visibleContainers.length > 0 ? (
|
||||
<ul className="flex flex-col gap-1 max-h-44 overflow-auto">
|
||||
{visibleContainers.map((c) => {
|
||||
const isActive = c.id === activeContainerId;
|
||||
return (
|
||||
<li
|
||||
key={c.id}
|
||||
|
|
|
|||
|
|
@ -237,7 +237,7 @@ function normalizeProvider(raw: ExternalProviderConfig): ExternalProviderConfig
|
|||
providerType === "openai" &&
|
||||
typeof raw.openaiContainerTtlMinutes === "number" &&
|
||||
raw.openaiContainerTtlMinutes >= 1
|
||||
? Math.min(raw.openaiContainerTtlMinutes, 10080)
|
||||
? Math.min(raw.openaiContainerTtlMinutes, 20)
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3433,10 +3433,52 @@ def extract_archive(archive_path: Path, destination: Path) -> None:
|
|||
) from exc
|
||||
return target
|
||||
|
||||
def _try_repair_missing_slash(
|
||||
member_name: str, link_name: str, archive_names: set[str]
|
||||
) -> str | None:
|
||||
"""Some upstream llama.cpp Mac releases (e.g. b9165, b9169) ship
|
||||
symlinks whose linkname is missing the directory separator AND
|
||||
the leading character of the file basename between the
|
||||
top-level dir and the rest of the path:
|
||||
|
||||
llama-b9165/libggml-rpc.0.dylib -> llama-b9165ibggml-rpc.0.11.1.dylib
|
||||
|
||||
That cannot be resolved as written. Detect the pattern
|
||||
(linkname starts with the top-level dir name but no following
|
||||
slash) and search archive entries under that dir for a real
|
||||
file whose basename ends with the mangled suffix. Only accept
|
||||
when the suffix uniquely identifies a real archive entry."""
|
||||
if "/" not in member_name or "/" in link_name:
|
||||
return None
|
||||
top, _, _ = member_name.partition("/")
|
||||
if not link_name.startswith(top) or len(link_name) <= len(top):
|
||||
return None
|
||||
bad_suffix = link_name[len(top) :]
|
||||
if not bad_suffix or bad_suffix.startswith("/"):
|
||||
return None
|
||||
prefix = f"{top}/"
|
||||
candidates = [
|
||||
name
|
||||
for name in archive_names
|
||||
if name.startswith(prefix)
|
||||
and "/" not in name[len(prefix) :]
|
||||
and name[len(prefix) :].endswith(bad_suffix)
|
||||
]
|
||||
if len(candidates) != 1:
|
||||
return None
|
||||
return candidates[0]
|
||||
|
||||
def safe_link_target(
|
||||
base: Path, member_name: str, link_name: str, target: Path
|
||||
base: Path,
|
||||
member_name: str,
|
||||
link_name: str,
|
||||
target: Path,
|
||||
archive_names: set[str],
|
||||
) -> tuple[str, Path]:
|
||||
normalized = link_name.replace("\\", "/")
|
||||
repaired = _try_repair_missing_slash(member_name, normalized, archive_names)
|
||||
if repaired is not None:
|
||||
normalized = repaired
|
||||
link_path = Path(normalized)
|
||||
if link_path.is_absolute():
|
||||
raise PrebuiltFallback(
|
||||
|
|
@ -3473,8 +3515,10 @@ def extract_archive(archive_path: Path, destination: Path) -> None:
|
|||
|
||||
def extract_tar_safely(source: Path, base: Path) -> None:
|
||||
pending_links: list[tuple[tarfile.TarInfo, Path]] = []
|
||||
archive_names: set[str] = set()
|
||||
with tarfile.open(source, "r:gz") as archive:
|
||||
for member in archive.getmembers():
|
||||
archive_names.add(member.name)
|
||||
target = safe_extract_path(base, member.name)
|
||||
if member.isdir():
|
||||
target.mkdir(parents = True, exist_ok = True)
|
||||
|
|
@ -3501,7 +3545,7 @@ def extract_archive(archive_path: Path, destination: Path) -> None:
|
|||
progressed = False
|
||||
for member, target in unresolved:
|
||||
normalized_link, resolved_target = safe_link_target(
|
||||
base, member.name, member.linkname, target
|
||||
base, member.name, member.linkname, target, archive_names
|
||||
)
|
||||
if not resolved_target.exists() and not resolved_target.is_symlink():
|
||||
next_round.append((member, target))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue