diff --git a/.github/workflows/studio-frontend-ci.yml b/.github/workflows/studio-frontend-ci.yml
index 3632125ca..1270a57ef 100644
--- a/.github/workflows/studio-frontend-ci.yml
+++ b/.github/workflows/studio-frontend-ci.yml
@@ -15,6 +15,8 @@ on:
pull_request:
paths:
- 'studio/frontend/**'
+ - 'scripts/check_frontend_dep_removal.py'
+ - 'tests/studio/test_frontend_dep_removal.py'
- '.github/workflows/studio-frontend-ci.yml'
push:
branches: [main, pip]
@@ -84,6 +86,26 @@ jobs:
exit 1
fi
+ # Catch the common foot-gun: a dep dropped from package.json that is
+ # still imported somewhere. The script walks the lockfile dep graph
+ # from the new top-level deps and only counts top-level node_modules
+ # paths as valid resolution targets for bare src/ imports.
+ #
+ # actions/checkout uses fetch-depth: 1 by default, so the base branch
+ # is not available locally. Fetch the single base commit with an
+ # explicit refspec so origin/ is reliably created (a bare
+ # `git fetch origin [` only updates FETCH_HEAD in some configs).
+ - name: Dependency removal safety check
+ if: github.event_name == 'pull_request'
+ working-directory: ${{ github.workspace }}
+ run: |
+ git fetch --no-tags --depth=1 origin \
+ "${{ github.base_ref }}:refs/remotes/origin/${{ github.base_ref }}"
+ python3 scripts/check_frontend_dep_removal.py \
+ --base "origin/${{ github.base_ref }}" \
+ --enumerate-dead
+ python3 tests/studio/test_frontend_dep_removal.py
+
- name: Typecheck
run: npm run typecheck
diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py
new file mode 100644
index 000000000..260ad5215
--- /dev/null
+++ b/scripts/check_frontend_dep_removal.py
@@ -0,0 +1,1195 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+"""Guard against breaking npm dependency removals in studio/frontend.
+
+Diffs the current package.json against a git base, finds every package
+that was removed, and confirms each is no longer referenced anywhere
+in the repo. If a removed package is still imported and is not
+transitively resolvable through the new lockfile, exits non-zero with
+file:line citations.
+
+Usage:
+ python scripts/check_frontend_dep_removal.py
+ python scripts/check_frontend_dep_removal.py --base origin/main
+ python scripts/check_frontend_dep_removal.py --base HEAD~1
+ python scripts/check_frontend_dep_removal.py --base-pkg PATH --head-lock PATH
+
+Exit codes:
+ 0 every removed dep is safe (no source refs or still resolvable)
+ 1 at least one removed dep is referenced and not resolvable
+ 2 invocation error (bad args, missing file, git error)
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+FRONTEND_PKG = "studio/frontend/package.json"
+FRONTEND_LOCK = "studio/frontend/package-lock.json"
+
+DEP_FIELDS = (
+ "dependencies",
+ "devDependencies",
+ "peerDependencies",
+ "optionalDependencies",
+)
+
+# Sources where seeing a package name does NOT count as usage.
+EXPECTED_NOISE_FILES = {
+ "studio/frontend/package.json",
+ "studio/frontend/package-lock.json",
+ "studio/backend/core/data_recipe/oxc-validator/package.json",
+ "studio/backend/core/data_recipe/oxc-validator/package-lock.json",
+}
+
+# Only quoted-string occurrences in these file types can be module specifiers.
+JS_LIKE_EXT = re.compile(
+ r"\.(ts|tsx|js|jsx|mjs|cjs|html|htm|css|scss|sass|json|jsonc)$"
+)
+# Files where JS-syntactic import patterns (static/dynamic/require/re-export)
+# could be a real module reference. Markdown gets a separate gate (.mdx is
+# real ESM; .md code fences are not).
+SCRIPT_LIKE_EXT = re.compile(r"\.(ts|tsx|js|jsx|mjs|cjs|mdx)$")
+STYLE_EXT = re.compile(r"\.(css|scss|sass)$")
+HTML_EXT = re.compile(r"\.(html|htm)$")
+TS_LIKE_EXT = re.compile(r"\.(ts|tsx|mts|cts|mdx)$")
+# Files where a removed package's CLI binary could be invoked (npx, bunx,
+# yarn dlx, pnpm exec, or a bare `pkg --flag` shell call).
+COMMAND_LIKE_EXT = re.compile(r"(\.(ya?ml|sh|ps1|bat)$|(^|/)Dockerfile[^/]*$)")
+
+GREP_INCLUDES = [
+ "--include=*.ts",
+ "--include=*.tsx",
+ "--include=*.js",
+ "--include=*.jsx",
+ "--include=*.mjs",
+ "--include=*.cjs",
+ "--include=*.html",
+ "--include=*.htm",
+ "--include=*.css",
+ "--include=*.scss",
+ "--include=*.sass",
+ "--include=*.json",
+ "--include=*.jsonc",
+ "--include=*.md",
+ "--include=*.mdx",
+ "--include=*.py",
+ "--include=*.rs",
+ "--include=*.toml",
+ "--include=*.yml",
+ "--include=*.yaml",
+ "--include=*.sh",
+ "--include=*.ps1",
+ "--include=*.bat",
+ "--include=Dockerfile*",
+]
+GREP_EXCLUDES = [
+ "--exclude-dir=node_modules",
+ "--exclude-dir=dist",
+ "--exclude-dir=.git",
+ "--exclude-dir=__pycache__",
+ "--exclude-dir=target",
+ "--exclude-dir=.next",
+ "--exclude-dir=build",
+ "--exclude-dir=.venv",
+ "--exclude-dir=venv",
+]
+
+# A pip-installed playwright reference is the PyPI package, not npm.
+PIP_PLAYWRIGHT = re.compile(
+ r"(pip\s+install\s+['\"]?playwright"
+ r"|python\s+-m\s+playwright"
+ r"|from\s+playwright"
+ r"|^\s*import\s+playwright)"
+)
+
+
+@dataclass
+class Hit:
+ file: str
+ line: int
+ kind: str
+ snippet: str
+
+
+def run(cmd: list[str], cwd: Path | None = None) -> str:
+ """Run a command, return stdout. On non-zero exit, return ''."""
+ res = subprocess.run(
+ cmd,
+ cwd = cwd or REPO_ROOT,
+ stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE,
+ text = True,
+ )
+ return res.stdout if res.returncode == 0 else ""
+
+
+def read_pkg_at(base: str, path: str) -> dict:
+ """Read JSON at `base:path` via git show. Empty dict if missing."""
+ out = run(["git", "show", f"{base}:{path}"])
+ if not out.strip():
+ return {}
+ return json.loads(out)
+
+
+def read_pkg_file(path: Path) -> dict:
+ if not path.exists():
+ return {}
+ return json.loads(path.read_text(encoding = "utf-8"))
+
+
+def all_decl_names(pkg: dict) -> set[str]:
+ names: set[str] = set()
+ for field in DEP_FIELDS:
+ names.update((pkg.get(field) or {}).keys())
+ return names
+
+
+def _resolve_install_path(parent_path: str, name: str, pkgs: dict) -> str | None:
+ """Walk up the nested node_modules chain from `parent_path` to find
+ where `name` actually resolves. Mirrors Node module resolution.
+ """
+ parts = parent_path.split("/node_modules/")
+ for i in range(len(parts), 0, -1):
+ prefix = "/node_modules/".join(parts[:i])
+ trial = (prefix + "/node_modules/" if prefix else "node_modules/") + name
+ if trial in pkgs:
+ return trial
+ if f"node_modules/{name}" in pkgs:
+ return f"node_modules/{name}"
+ return None
+
+
+def _deps_of(meta: dict) -> dict:
+ """Deps npm actually installs. Optional peers are skipped: npm only
+ installs them when another package declares the same dep, so for the
+ purpose of "is this package still reachable" they cannot keep a
+ removed top-level dep alive on their own.
+ """
+ out = {}
+ for field in ("dependencies", "optionalDependencies"):
+ out.update(meta.get(field) or {})
+ peer_meta = meta.get("peerDependenciesMeta") or {}
+ for name, spec in (meta.get("peerDependencies") or {}).items():
+ if (peer_meta.get(name) or {}).get("optional"):
+ continue
+ out[name] = spec
+ return out
+
+
+def reachable_from_head(head_pkg: dict, lock: dict) -> set[str]:
+ """BFS the lockfile dep graph starting from `head_pkg`'s top-level
+ declared deps. Returns the set of lockfile install paths that survive.
+ Stale lockfile entries (orphaned by the new package.json) are excluded.
+ """
+ pkgs = lock.get("packages", {})
+ if not pkgs:
+ return set()
+ roots = all_decl_names(head_pkg)
+ seen: set[str] = set()
+ frontier: list[str] = []
+ for name in roots:
+ p = _resolve_install_path("", name, pkgs)
+ if p:
+ frontier.append(p)
+ while frontier:
+ path = frontier.pop()
+ if path in seen:
+ continue
+ seen.add(path)
+ meta = pkgs.get(path, {})
+ for dep_name in _deps_of(meta):
+ p = _resolve_install_path(path, dep_name, pkgs)
+ if p and p not in seen:
+ frontier.append(p)
+ return seen
+
+
+def classify(pkg: str, file: str, content: str) -> str | None:
+ """Return why `content` references `pkg`, or None.
+
+ `content` may span multiple lines (for multi-line imports/exports);
+ each pattern uses re.DOTALL where it matters. The bare-spec
+ regexes use a word-boundary check on the package name so that
+ `foobar` does not match `foo`.
+
+ File-type gating: JS-syntactic patterns only fire on .ts/.tsx/.js/.jsx/
+ .mjs/.cjs/.mdx files, so an `import x from "pkg"` snippet inside a
+ Python test fixture or a Markdown code block is not mistaken for a
+ real npm usage. CSS patterns only fire on .css/.scss/.sass. HTML
+ patterns only fire on .html/.htm.
+ """
+ if file in EXPECTED_NOISE_FILES:
+ return None
+
+ esc = re.escape(pkg)
+ # Subpath gate: after the package name, the next char must be either
+ # the closing quote, `/`, or end-of-string. Prevents foo matching foobar.
+ sub = r"(?:/[^'\"`]*)?"
+
+ flags_dotall = re.DOTALL | re.MULTILINE
+
+ is_script = bool(SCRIPT_LIKE_EXT.search(file))
+ is_style = bool(STYLE_EXT.search(file))
+ is_html = bool(HTML_EXT.search(file))
+ is_ts = bool(TS_LIKE_EXT.search(file))
+
+ # If the file is none of script / style / html / json (which is the
+ # quoted-string fallback surface) and is not an mdx file, no classify
+ # rule applies. This is what gates out Python fixtures, Markdown code
+ # blocks, shell snippets, etc.
+ is_json = file.endswith(".json") or file.endswith(".jsonc")
+ if not (is_script or is_style or is_html or is_json):
+ return None
+
+ # CSS @import is checked first so it does not collide with the
+ # side-effect-import regex below.
+ if is_style and re.search(rf"@import\s+['\"]{esc}{sub}['\"]", content):
+ return "css_import"
+ # Static imports: handle multi-line `import { ... } from "pkg"` by
+ # allowing arbitrary content (newlines included) between `import`
+ # and `from`. The non-greedy match plus the required `from` keeps
+ # this scoped to a single statement.
+ if is_script and re.search(
+ rf"(?]*src\s*=\s*['\"][^'\"]*/{html_pkg}", content
+ ):
+ return "html_script"
+ if is_html and re.search(rf"]*href\s*=\s*['\"][^'\"]*/{html_pkg}", content):
+ return "html_link"
+ # TypeScript triple-slash
+ if is_ts and re.search(
+ rf"///\s* list[str]:
+ """Return a list of warnings if package-lock.json's dep map
+ disagrees with package.json (i.e., npm install was not re-run).
+ """
+ warnings = []
+ if not head_lock:
+ return warnings
+ root = head_lock.get("packages", {}).get("", {})
+ lock_decl = {
+ **(root.get("dependencies") or {}),
+ **(root.get("devDependencies") or {}),
+ **(root.get("peerDependencies") or {}),
+ **(root.get("optionalDependencies") or {}),
+ }
+ pkg_decl = {}
+ for f in DEP_FIELDS:
+ pkg_decl.update(head_pkg.get(f) or {})
+ only_in_lock = set(lock_decl) - set(pkg_decl)
+ only_in_pkg = set(pkg_decl) - set(lock_decl)
+ if only_in_lock:
+ warnings.append(
+ f"lockfile lists deps not in package.json (lockfile stale): {sorted(only_in_lock)}"
+ )
+ if only_in_pkg:
+ warnings.append(
+ f"package.json declares deps not in lockfile (run npm install): {sorted(only_in_pkg)}"
+ )
+ return warnings
+
+
+def types_orphan_warnings(head_pkg: dict) -> list[str]:
+ """Flag @types/ deps where is no longer declared anywhere
+ in package.json. Removing X without also dropping @types/X leaves
+ dangling type packages.
+ """
+ decl = set()
+ for f in DEP_FIELDS:
+ decl.update((head_pkg.get(f) or {}).keys())
+ warnings = []
+ for name in decl:
+ if not name.startswith("@types/"):
+ continue
+ # @types/foo provides types for `foo`
+ # @types/foo-bar provides types for `foo-bar`
+ # @types/scope__pkg provides types for `@scope/pkg`
+ target = name[len("@types/") :]
+ if "__" in target:
+ scope, sub = target.split("__", 1)
+ target = f"@{scope}/{sub}"
+ if target == "node":
+ continue # Node.js types are always implicit
+ if target not in decl:
+ warnings.append(
+ f"@types/{target.replace('@', '').replace('/', '__')} present but '{target}' is not declared"
+ )
+ return warnings
+
+
+_PKG_JSON_SKIP_KEYS = {
+ "dependencies",
+ "devDependencies",
+ "peerDependencies",
+ "optionalDependencies",
+ "bundleDependencies",
+ "bundledDependencies",
+}
+
+# Top-level fields whose contents are never package references. We walk
+# everything else recursively.
+_PKG_JSON_OPAQUE_KEYS = {
+ "browserslist", # browser queries
+ "keywords", # free-form strings
+ "engines", # node/npm version constraints
+ "engineStrict", # bool
+ "packageManager", # `pnpm@9.0.0` -- the package manager binary
+ "volta", # version pins for node/npm/yarn
+ "files", # paths included in publish
+ "directories", # paths
+ "publishConfig", # registry / access config
+ "config", # generic npm config values
+ "main",
+ "module",
+ "browser",
+ "types",
+ "typings",
+ "type",
+ "exports",
+ "imports",
+ "bin",
+ "man", # author-side fields (not consumer refs)
+ "scripts", # handled separately via scripts_bin_refs()
+ "repository",
+ "bugs",
+ "homepage",
+ "funding",
+ "author",
+ "contributors",
+ "maintainers",
+ "license",
+ "licenses",
+ "name",
+ "version",
+ "description",
+ "private",
+ "sideEffects",
+ "workspaces", # paths/globs, NOT pkg names
+}
+
+
+def package_json_extra_refs(pkg: dict, target: str) -> list[str]:
+ """Walk every key/value in package.json EXCEPT the dep declaration
+ blocks, and return citations for string values or dict keys that
+ equal `target` (or `target/subpath`).
+
+ Catches the patterns the public dep-checker tools commonly miss:
+ - `overrides` / `resolutions` / `pnpm.overrides` keys
+ - `pnpm.patchedDependencies` keys
+ - `peerDependenciesMeta` keys
+ - `prettier`: "@my/prettier-config"
+ - `eslintConfig.extends`: ["..."] / "..."
+ - `stylelint.extends` / `stylelint.plugins`
+ - `babel.presets` / `babel.plugins`
+ - `jest.preset` / `jest.setupFiles` / `jest.transform`
+ - `commitlint.extends`, `renovate.extends`, `remarkConfig.plugins`
+ """
+ target_sub = target + "/"
+ cites: list[str] = []
+
+ def matches(s: object) -> bool:
+ return isinstance(s, str) and (s == target or s.startswith(target_sub))
+
+ def walk(obj: object, path: str) -> None:
+ if isinstance(obj, dict):
+ for k, v in obj.items():
+ # Skip top-level dep declaration fields entirely.
+ if path == "" and k in _PKG_JSON_SKIP_KEYS:
+ continue
+ # Top-level fields whose contents are never package refs.
+ if path == "" and k in _PKG_JSON_OPAQUE_KEYS:
+ continue
+ # Inside `overrides` / `resolutions` / etc., the KEY itself
+ # is a package reference.
+ if matches(k):
+ cites.append(f"{path}.{k}" if path else k)
+ walk(v, f"{path}.{k}" if path else k)
+ elif isinstance(obj, list):
+ for i, v in enumerate(obj):
+ walk(v, f"{path}[{i}]")
+ elif isinstance(obj, str):
+ if matches(obj):
+ cites.append(f"{path}: {obj}")
+
+ walk(pkg, "")
+ return cites
+
+
+def build_bin_to_pkg(head_lock: dict) -> dict[str, str]:
+ """Map a binary name (e.g. 'vite', 'tsc', 'eslint') to the package
+ that provides it. Built from each lockfile entry's `bin` field.
+ """
+ out: dict[str, str] = {}
+ if not head_lock:
+ return out
+ for path, meta in head_lock.get("packages", {}).items():
+ if not path:
+ continue
+ name = path.split("node_modules/")[-1]
+ bins = meta.get("bin")
+ if isinstance(bins, dict):
+ for binname in bins:
+ out.setdefault(binname, name)
+ elif isinstance(bins, str):
+ out.setdefault(name.split("/")[-1], name)
+ return out
+
+
+_SCRIPT_TOKENIZE = re.compile(r"\s*(?:&&|\|\||;|\|(?!\|))\s*")
+
+# Wrappers that delegate to a real CLI in the same shell word list.
+# After stripping env prefixes and (optionally) `npx`/`pnpm exec`/`yarn dlx`/
+# `bunx`, if the leading token is one of these we advance past the
+# wrapper's own flags and any further env-prefix tokens, then re-check.
+# `cross-env` is the common one; `dotenv-cli` / `dotenvx` use `--` as a
+# separator. Wrappers that operate on named npm-scripts (concurrently,
+# npm-run-all, run-s, run-p, wireit, turbo, nx) intentionally aren't
+# here -- they reference script names, not bin names, so the real bin
+# is in the *target* script's chunk which we already tokenize.
+_SCRIPT_WRAPPERS = {"cross-env", "dotenv", "dotenvx", "env-cmd"}
+_ENV_PREFIX_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
+
+
+def _next_real_bin(words: list[str], idx: int) -> str | None:
+ """Walk `words` from `idx`, peeling env-prefix tokens, the leading
+ package-manager runner (`npx`, `pnpm exec`, etc.), and the known
+ wrapper bins. Return the next token that looks like the real CLI
+ binary, or None if the chunk has nothing to look up.
+
+ Recursion depth is bounded by the chunk's word count, so the loop
+ cannot run away on a pathological wrapper chain.
+ """
+ seen_wrappers: set[str] = set()
+ while idx < len(words):
+ # 1. env-prefix run: `FOO=bar BAZ="a b" cmd ...`. shlex has
+ # already collapsed quoted values into one word, so this
+ # tokenizer is safe for them.
+ while idx < len(words) and _ENV_PREFIX_RE.match(words[idx]):
+ idx += 1
+ if idx >= len(words):
+ return None
+
+ first = words[idx]
+ # 2. Package-manager runner: `npx args`, `pnpm exec `,
+ # `yarn dlx `, `bunx `. Strip and continue (so the
+ # wrapped command goes through the same unwrap loop).
+ if first in {"npx", "pnpx", "bunx"} and idx + 1 < len(words):
+ idx += 1
+ continue
+ if (
+ first in {"pnpm", "yarn"}
+ and idx + 2 < len(words)
+ and words[idx + 1] in {"exec", "dlx"}
+ ):
+ idx += 2
+ continue
+
+ # 3. Wrapper bin (cross-env, dotenv, etc.). Skip the wrapper's
+ # own flags and any subsequent env-prefix tokens, then re-loop.
+ bin_token = first.removeprefix("./node_modules/.bin/").removeprefix(
+ "node_modules/.bin/"
+ )
+ if bin_token in _SCRIPT_WRAPPERS and bin_token not in seen_wrappers:
+ seen_wrappers.add(bin_token)
+ idx += 1
+ # cross-env / env-cmd: no flags; just more env-prefix tokens.
+ # dotenv / dotenvx: skip `-e ` style flags and the
+ # optional `--` separator before the wrapped command.
+ while idx < len(words):
+ tok = words[idx]
+ if tok.startswith("-") and tok != "--":
+ idx += 1
+ # `-e .env` style: also skip the flag's argument
+ # when it does not look like another flag.
+ if (
+ idx < len(words)
+ and not words[idx].startswith("-")
+ and not _ENV_PREFIX_RE.match(words[idx])
+ ):
+ idx += 1
+ continue
+ if tok == "--":
+ idx += 1
+ break
+ break
+ continue
+ return bin_token
+ return None
+
+
+def scripts_bin_refs(
+ head_pkg: dict, bin_to_pkg: dict[str, str]
+) -> dict[str, list[str]]:
+ """Return `{package_name: ['scripts.X: cmd', ...]}` listing every
+ package referenced via its bin name in package.json scripts.
+
+ Each script value is split on shell separators (`&&`, `||`, `;`,
+ `|`). Within each chunk, `_next_real_bin()` unwraps env prefixes,
+ package-manager runners (`npx` / `pnpm exec` / `yarn dlx` / `bunx`),
+ and wrapper bins like `cross-env` / `dotenv` so that
+ `cross-env CI=1 biome check` correctly credits `biome` to its
+ declaring package.
+
+ Tokenization uses shlex.split so quoted env values
+ (`FOO="a b" biome`) survive unbroken.
+ """
+ import shlex
+
+ scripts = head_pkg.get("scripts", {}) or {}
+ refs: dict[str, list[str]] = {}
+ for script_name, raw_cmd in scripts.items():
+ if not isinstance(raw_cmd, str):
+ continue
+ for chunk in _SCRIPT_TOKENIZE.split(raw_cmd):
+ chunk = chunk.strip()
+ if not chunk:
+ continue
+ try:
+ words = shlex.split(chunk, posix = True)
+ except ValueError:
+ # Unbalanced quotes -- fall back to plain split.
+ words = chunk.split()
+ if not words:
+ continue
+ bin_name = _next_real_bin(words, 0)
+ if bin_name is None:
+ continue
+ pkg = bin_to_pkg.get(bin_name)
+ if pkg:
+ refs.setdefault(pkg, []).append(f"scripts.{script_name}: {raw_cmd}")
+ return refs
+
+
+def tsconfig_compiler_types_refs() -> set[str]:
+ """Read studio/frontend/tsconfig*.json and return the set of
+ package names referenced in compilerOptions.types arrays. These are
+ implicitly loaded by tsc and count as a real use even though they
+ have no explicit import.
+ """
+ out: set[str] = set()
+ base = REPO_ROOT / "studio/frontend"
+ for name in ("tsconfig.json", "tsconfig.app.json", "tsconfig.node.json"):
+ path = base / name
+ if not path.exists():
+ continue
+ try:
+ text = path.read_text()
+ # tsconfig allows comments; strip simple line comments.
+ text = re.sub(r"//[^\n]*", "", text)
+ data = json.loads(text)
+ except (OSError, json.JSONDecodeError):
+ continue
+ types = (data.get("compilerOptions", {}) or {}).get("types", []) or []
+ for t in types:
+ if not isinstance(t, str):
+ continue
+ # `vite/client` resolves to `vite` package.
+ pkg = (
+ t.split("/", 1)[0]
+ if not t.startswith("@")
+ else "/".join(t.split("/", 2)[:2])
+ )
+ out.add(pkg)
+ return out
+
+
+def enumerate_dep_usage(head_pkg: dict, head_lock: dict) -> dict[str, list]:
+ """For every declared dep, classify whether it appears used. Returns
+ a dict with these categories:
+ - used: has at least one detected usage in src/,
+ config files, scripts.bin, package.json
+ field refs, or tsconfig types
+ - unused: no detected usage anywhere
+ - type_pkg_kept: @types/X where X is still declared
+ - type_pkg_orphan: @types/X where X is no longer declared
+ (or X is removed) -- candidate for removal
+
+ Each entry is the package name. The categorisation is opinionated;
+ `unused` is a CANDIDATE list, not a guarantee. The caller should
+ verify before deletion.
+ """
+ decl = all_decl_names(head_pkg)
+ bin_to_pkg = build_bin_to_pkg(head_lock) if head_lock else {}
+ script_refs = scripts_bin_refs(head_pkg, bin_to_pkg)
+ tsc_types = tsconfig_compiler_types_refs()
+
+ results: dict[str, list] = {
+ "used": [],
+ "unused": [],
+ "type_pkg_kept": [],
+ "type_pkg_orphan": [],
+ }
+ for name in sorted(decl):
+ if name.startswith("@types/"):
+ target = name[len("@types/") :]
+ if "__" in target:
+ scope, sub = target.split("__", 1)
+ target = f"@{scope}/{sub}"
+ if target == "node":
+ results["type_pkg_kept"].append(name)
+ elif target in decl:
+ results["type_pkg_kept"].append(name)
+ else:
+ results["type_pkg_orphan"].append(name)
+ continue
+ # Real-source-usage check
+ hits = find_usage(name)
+ used = bool(hits)
+ # CLI usage in shell / workflow / Dockerfile surfaces. Skip for
+ # `@types/*` packages because they never expose a CLI binary and
+ # the unscoped-tail bin name candidate would scan workflow files
+ # for the bare runtime name (a removed `@types/foo` would look
+ # for invocations of `foo`).
+ if not used and not name.startswith("@types/") and find_command_usage(name):
+ used = True
+ # Bin scripts
+ if not used and name in script_refs:
+ used = True
+ # package.json non-dep field references
+ if not used and package_json_extra_refs(head_pkg, name):
+ used = True
+ # tsconfig compilerOptions.types implicit usage
+ if not used and name in tsc_types:
+ used = True
+ if used:
+ results["used"].append(name)
+ else:
+ results["unused"].append(name)
+ return results
+
+
+def find_imports_without_decl(head_pkg: dict) -> list[tuple[str, int, str]]:
+ """Reverse check: find bare-specifier imports in studio/frontend/src
+ that don't correspond to any declared package.json dep. Catches the
+ case where someone adds an import but forgets the dep declaration.
+ Returns (file, line, spec) tuples.
+
+ Match shapes covered:
+ import "pkg"
+ import Foo from "pkg"
+ import { Foo } from "pkg"
+ import type { Foo } from "pkg"
+ const x = require("pkg")
+ const x = await import("pkg")
+ """
+ decl = set()
+ for f in DEP_FIELDS:
+ decl.update((head_pkg.get(f) or {}).keys())
+ # Also: anything tsconfig path-aliases (just '@/...' here) is internal.
+ # The capture group is the specifier; the leading alternation accepts
+ # any of: `from "..."`, bare side-effect `import "..."`,
+ # `import("..."), or `require("...")`. We exclude relative paths and
+ # the `@/` alias prefix by requiring the first char of the specifier
+ # to be neither `.` nor `/`.
+ pattern = (
+ r"(?:\bfrom\s+|"
+ r"\bimport\s+(?:\(\s*)?|"
+ r"\brequire(?:\.resolve)?\(\s*)"
+ r"['\"]([^'\"./][^'\"]*)['\"]"
+ )
+ args = [
+ "grep",
+ "-rnE",
+ pattern,
+ "--include=*.ts",
+ "--include=*.tsx",
+ "--include=*.js",
+ "--include=*.jsx",
+ "studio/frontend/src",
+ ]
+ out = run(args)
+ missing = []
+ for line in out.splitlines():
+ m = re.match(r"^(?:\./)?([^:]+):(\d+):(.*)$", line)
+ if not m:
+ continue
+ file, ln, content = m.group(1), int(m.group(2)), m.group(3)
+ for spec_match in re.finditer(pattern, content):
+ spec = spec_match.group(1)
+ # Resolve to package name (strip subpath)
+ if spec.startswith("@"):
+ parts = spec.split("/", 2)
+ pkg_name = "/".join(parts[:2]) if len(parts) >= 2 else spec
+ else:
+ pkg_name = spec.split("/", 1)[0]
+ if pkg_name in decl:
+ continue
+ # Internal aliases like '@/foo' or starts with builtin names
+ if pkg_name == "@":
+ continue
+ if pkg_name in {
+ "node:fs",
+ "node:path",
+ "fs",
+ "path",
+ "url",
+ "stream",
+ "crypto",
+ "buffer",
+ "util",
+ "events",
+ "child_process",
+ }:
+ continue
+ missing.append((file, ln, spec))
+ return missing
+
+
+def grep_repo(pat: str) -> list[tuple[str, int, str]]:
+ args = ["grep", "-rnE", pat] + GREP_INCLUDES + GREP_EXCLUDES + ["."]
+ out = run(args)
+ rows = []
+ for line in out.splitlines():
+ m = re.match(r"^(\./)?([^:]+):(\d+):(.*)$", line)
+ if m:
+ rows.append((m.group(2), int(m.group(3)), m.group(4)))
+ return rows
+
+
+_file_lines_cache: dict[str, list[str]] = {}
+
+
+def _read_file(path: str) -> list[str]:
+ if path not in _file_lines_cache:
+ try:
+ _file_lines_cache[path] = (
+ Path(path).read_text(errors = "replace").splitlines()
+ )
+ except (OSError, UnicodeDecodeError):
+ _file_lines_cache[path] = []
+ return _file_lines_cache[path]
+
+
+def find_usage(pkg: str) -> list[Hit]:
+ """Return real usages of `pkg`. Filters pip-playwright separately.
+
+ For each filename returned by grep, also feed a multi-line window
+ around the matching line into classify() so multi-line imports
+ (`import {\n a\n} from "pkg"`) get picked up.
+ """
+ rows = grep_repo(re.escape(pkg))
+ hits = []
+ seen_keys: set[tuple[str, str]] = set()
+ for file, lineno, content in rows:
+ if pkg == "playwright" and PIP_PLAYWRIGHT.search(content):
+ continue
+ # Try the single-line classify first.
+ kind = classify(pkg, file, content)
+ if not kind:
+ # Multi-line window: a generous 25 lines above + the line +
+ # 25 below so Prettier's one-import-per-line formatting for
+ # 12-20+ named imports still includes the `import` keyword
+ # in the same window as the `from "pkg"` clause.
+ lines = _read_file(file)
+ lo = max(0, lineno - 26)
+ hi = min(len(lines), lineno + 25)
+ window = "\n".join(lines[lo:hi])
+ kind = classify(pkg, file, window)
+ if kind:
+ key = (file, kind)
+ if key in seen_keys:
+ continue
+ seen_keys.add(key)
+ hits.append(Hit(file, lineno, kind, content[:160]))
+ return hits
+
+
+def _candidate_bin_names(pkg: str) -> set[str]:
+ """Names a removed package's CLI could be invoked under in shell
+ scripts and workflow files. Most npm CLIs use the package name
+ (`vite`, `eslint`, `playwright`); scoped CLI packages commonly
+ expose an unscoped binary name (`@biomejs/biome` -> `biome`).
+ """
+ return {pkg, pkg.rsplit("/", 1)[-1]}
+
+
+def find_command_usage(pkg: str) -> list[Hit]:
+ """Find package CLI invocations in shell / workflow / Dockerfile
+ surfaces: `npx pkg`, `bunx pkg`, `pnpm exec pkg`, `yarn dlx pkg`,
+ or a bare `pkg --flag`. Returns Hit("command_bin").
+
+ Detection is bounded to COMMAND_LIKE_EXT files so a JS string that
+ happens to contain `npx foo` inside a TS test fixture is not
+ mistaken for a real invocation.
+ """
+ bins = sorted(_candidate_bin_names(pkg), key = len, reverse = True)
+ esc_bins = "|".join(re.escape(b) for b in bins)
+ # grep ERE pattern (POSIX classes for whitespace/word boundaries).
+ # Build without f-strings to avoid f-string-vs-{} confusion with the
+ # POSIX `[[:space:]]` literals and trailing `})}` boundary class.
+ grep_pat = (
+ r"(^|[[:space:]:;&|(\[])"
+ r"(npx[[:space:]]+|pnpm[[:space:]]+exec[[:space:]]+"
+ r"|yarn[[:space:]]+(dlx[[:space:]]+)?|bunx[[:space:]]+)?"
+ r"(" + esc_bins + r")"
+ r"([[:space:])};|\]]|$)"
+ )
+ py_pat = re.compile(
+ r"(^|[\s:;&|(\[])"
+ r"(?:npx\s+|pnpm\s+exec\s+|yarn\s+(?:dlx\s+)?|bunx\s+)?"
+ r"(" + esc_bins + r")"
+ r"([\s)};|\]]|$)"
+ )
+ hits: list[Hit] = []
+ seen: set[tuple[str, int]] = set()
+ for file, lineno, content in grep_repo(grep_pat):
+ if not COMMAND_LIKE_EXT.search(file):
+ continue
+ if pkg == "playwright" and PIP_PLAYWRIGHT.search(content):
+ continue
+ if not py_pat.search(content):
+ continue
+ key = (file, lineno)
+ if key in seen:
+ continue
+ seen.add(key)
+ hits.append(Hit(file, lineno, "command_bin", content[:160]))
+ return hits
+
+
+def types_target_name(pkg: str) -> str | None:
+ """Strip `@types/` prefix and decode the npm scope-encoding so the
+ return value matches the runtime package name. `@types/foo` -> `foo`,
+ `@types/foo__bar` -> `@foo/bar`. Returns None for non-@types packages.
+ """
+ if not pkg.startswith("@types/"):
+ return None
+ target = pkg[len("@types/") :]
+ if "__" in target:
+ scope, sub = target.split("__", 1)
+ return f"@{scope}/{sub}"
+ return target
+
+
+def find_types_runtime_usage(pkg: str, tsc_types: set[str]) -> list[Hit]:
+ """For a removed `@types/X`, find usages of `X` itself: explicit
+ `/// `, `tsconfig.compilerOptions.types: ["X"]`,
+ and runtime `import "X"` shapes. The whole point of `@types/X` is to
+ type one of those; if any are present, the type package must stay.
+ """
+ target = types_target_name(pkg)
+ if target is None:
+ return []
+ hits = find_usage(target)
+ if target in tsc_types:
+ hits.append(
+ Hit(
+ "studio/frontend/tsconfig*.json",
+ 0,
+ "tsconfig_types",
+ f'compilerOptions.types includes "{target}"',
+ )
+ )
+ return hits
+
+
+def main() -> int:
+ p = argparse.ArgumentParser(
+ description = __doc__, formatter_class = argparse.RawTextHelpFormatter
+ )
+ p.add_argument(
+ "--base",
+ default = "origin/main",
+ help = "git ref to diff against (default: origin/main). "
+ "Examples: HEAD~1, main, a-tag, a-sha.",
+ )
+ p.add_argument(
+ "--base-pkg", help = "optional override: read base package.json from this path"
+ )
+ p.add_argument(
+ "--base-lock",
+ help = "optional override: read base package-lock.json from this path. "
+ "Used to recover the bin -> package mapping for removed packages so "
+ "scripts.foo still flags as a usage even after the PR drops node_modules/foo.",
+ )
+ p.add_argument(
+ "--head-pkg",
+ default = str(REPO_ROOT / FRONTEND_PKG),
+ help = "head package.json path (default: working tree)",
+ )
+ p.add_argument(
+ "--head-lock",
+ default = str(REPO_ROOT / FRONTEND_LOCK),
+ help = "head lockfile path (default: working tree). "
+ "Reachability analysis runs against this lockfile.",
+ )
+ p.add_argument("--verbose", action = "store_true")
+ p.add_argument(
+ "--strict",
+ action = "store_true",
+ help = "Also fail on hygiene warnings (lockfile sync, "
+ "@types orphans, imports without declared dep, unused deps).",
+ )
+ p.add_argument(
+ "--enumerate-dead",
+ action = "store_true",
+ help = "Print every declared dep that appears unused anywhere "
+ "in the repo. Informational; does not fail unless --strict.",
+ )
+ args = p.parse_args()
+
+ if args.base_pkg:
+ base_pkg = read_pkg_file(Path(args.base_pkg))
+ else:
+ base_pkg = read_pkg_at(args.base, FRONTEND_PKG)
+ head_pkg = read_pkg_file(Path(args.head_pkg))
+ if not base_pkg:
+ print(
+ f"ERROR: could not read base package.json at {args.base}:{FRONTEND_PKG}",
+ file = sys.stderr,
+ )
+ return 2
+ if not head_pkg:
+ print(
+ f"ERROR: could not read head package.json at {args.head_pkg}",
+ file = sys.stderr,
+ )
+ return 2
+
+ head_lock_path = Path(args.head_lock)
+ if not head_lock_path.exists():
+ print(
+ f"ERROR: head lockfile not found at {head_lock_path}",
+ file = sys.stderr,
+ )
+ return 2
+ head_lock = read_pkg_file(head_lock_path)
+
+ # Base lockfile is best-effort. We use it only to recover the
+ # bin -> package mapping for packages the PR is removing -- so a
+ # `scripts.biome:check` cite still fires when `@biomejs/biome` is
+ # being dropped and the head lockfile no longer has it.
+ if args.base_lock:
+ base_lock_path = Path(args.base_lock)
+ base_lock = read_pkg_file(base_lock_path) if base_lock_path.exists() else {}
+ else:
+ base_lock = read_pkg_at(args.base, FRONTEND_LOCK)
+
+ base_names = all_decl_names(base_pkg)
+ head_names = all_decl_names(head_pkg)
+ removed = sorted(base_names - head_names)
+
+ # All hygiene checks compute up front so they can run on both the
+ # removal-present and removal-empty paths (so `--strict` actually
+ # fails when only hygiene issues exist).
+ sync_warns = lockfile_root_sync(head_pkg, head_lock)
+ types_warns = types_orphan_warnings(head_pkg)
+ missing_imports = find_imports_without_decl(head_pkg)
+ enum = enumerate_dep_usage(head_pkg, head_lock) if args.enumerate_dead else None
+
+ def _print_hygiene() -> None:
+ if sync_warns:
+ print("Lockfile sync warnings:")
+ for w in sync_warns:
+ print(f" - {w}")
+ print()
+ if types_warns:
+ print("@types orphan warnings:")
+ for w in types_warns:
+ print(f" - {w}")
+ print()
+ if missing_imports:
+ print(
+ f"Imports without a matching package.json dep ({len(missing_imports)}):"
+ )
+ for file, ln, spec in missing_imports[:20]:
+ print(f" - {file}:{ln} imports '{spec}'")
+ print()
+ if enum is not None:
+ print("Dead-dep enumeration:")
+ if enum["unused"]:
+ print(f" unused ({len(enum['unused'])}):")
+ for n in enum["unused"]:
+ print(f" - {n}")
+ else:
+ print(" unused: none")
+ if enum["type_pkg_orphan"]:
+ print(f" type_pkg_orphan ({len(enum['type_pkg_orphan'])}):")
+ for n in enum["type_pkg_orphan"]:
+ print(f" - {n}")
+ if args.verbose:
+ print(f" used: {len(enum['used'])}")
+ print(f" type_pkg_kept: {len(enum['type_pkg_kept'])}")
+ print()
+
+ hygiene_strict_fail = args.strict and (
+ sync_warns
+ or types_warns
+ or missing_imports
+ or (enum is not None and (enum["unused"] or enum["type_pkg_orphan"]))
+ )
+
+ if not removed:
+ print("[OK] no dependencies removed from studio/frontend/package.json")
+ if args.enumerate_dead or sync_warns or types_warns or missing_imports:
+ print()
+ _print_hygiene()
+ if hygiene_strict_fail:
+ print("FAIL (--strict): one or more hygiene warnings present")
+ return 1
+ return 0
+
+ print(
+ f"Checking {len(removed)} removed package(s) from studio/frontend/package.json"
+ )
+ print(f"Base: {args.base} Head: working tree")
+ print()
+
+ reachable_paths = reachable_from_head(head_pkg, head_lock) if head_lock else set()
+ # bin -> package map: start from the head lockfile, then layer the
+ # base lockfile's entries on top for packages this PR is removing.
+ # A correct removal updates the head lockfile to drop node_modules/foo,
+ # so build_bin_to_pkg(head_lock) loses the mapping; we recover it
+ # from the base lockfile so `scripts.biome:check` still flags as a
+ # usage when `@biomejs/biome` is being dropped.
+ bin_to_pkg = build_bin_to_pkg(head_lock) if head_lock else {}
+ base_bin_to_pkg = build_bin_to_pkg(base_lock) if base_lock else {}
+ removed_set = set(removed)
+ for bin_name, pkg_name in base_bin_to_pkg.items():
+ if pkg_name in removed_set:
+ bin_to_pkg.setdefault(bin_name, pkg_name)
+ script_refs = scripts_bin_refs(head_pkg, bin_to_pkg)
+ tsc_types = tsconfig_compiler_types_refs()
+
+ def reachable_install_paths(name: str) -> tuple[str | None, list[str]]:
+ """Return (top_level_path, nested_paths). top_level is what bare
+ `import "name"` from src/ actually resolves to; nested copies are
+ only visible inside the parent package that nested them.
+ """
+ top = f"node_modules/{name}"
+ top_path = top if top in reachable_paths else None
+ nested = sorted(
+ p
+ for p in reachable_paths
+ if p != top and p.endswith(f"/node_modules/{name}")
+ )
+ return top_path, nested
+
+ failures: list[tuple[str, list[Hit]]] = []
+ for name in removed:
+ hits = find_usage(name)
+ # CLI invocations in shell scripts / workflows / Dockerfiles.
+ hits.extend(find_command_usage(name))
+ # @types/X is "used" if X is referenced as a type or as a
+ # runtime import elsewhere in the repo.
+ hits.extend(find_types_runtime_usage(name, tsc_types))
+ for cite in script_refs.get(name, []):
+ hits.append(Hit("studio/frontend/package.json", 0, "script_bin", cite))
+ for cite in package_json_extra_refs(head_pkg, name):
+ hits.append(Hit("studio/frontend/package.json", 0, "pkg_json_field", cite))
+ top, nested = reachable_install_paths(name)
+ importable_top_level = top is not None
+ # Source imports of bare specifier `name` resolve ONLY to top-level
+ # node_modules/. Nested copies under another package are
+ # invisible to src/ files.
+ if hits and not importable_top_level:
+ status = "FAIL"
+ elif hits and importable_top_level:
+ status = "OK-via-transitive"
+ else:
+ status = "OK"
+ print(f" [{status}] {name}")
+ if top:
+ print(f" reachable (top-level): {top}")
+ if nested:
+ print(
+ f" reachable (nested, NOT importable from src/): {nested[0]}"
+ + (f" (+{len(nested)-1} more)" if len(nested) > 1 else "")
+ )
+ if hits:
+ for h in hits[:5]:
+ print(f" [{h.kind}] {h.file}:{h.line} {h.snippet}")
+ if status == "FAIL":
+ failures.append((name, hits))
+ if args.verbose and not hits and not (top or nested):
+ print(" no references, not reachable -- clean removal")
+
+ print()
+
+ _print_hygiene()
+
+ if failures:
+ print(
+ f"FAIL: {len(failures)} removed package(s) still referenced and not resolvable"
+ )
+ for name, _ in failures:
+ print(f" - {name}")
+ return 1
+ if hygiene_strict_fail:
+ print("FAIL (--strict): one or more hygiene warnings present")
+ return 1
+
+ print("PASS: all removed packages are safe to drop")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py
new file mode 100644
index 000000000..a8e4cda8b
--- /dev/null
+++ b/tests/studio/test_frontend_dep_removal.py
@@ -0,0 +1,1628 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+"""Edge-case suite for scripts/check_frontend_dep_removal.py.
+
+Each case patches a copy of studio/frontend/package.json to remove (or
+move) a specific dependency, invokes the checker against the real
+working tree's lockfile, and asserts the verdict matches expectations.
+
+Run:
+ python tests/studio/test_frontend_dep_removal.py
+
+Exits 0 iff every case behaves as expected.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[2]
+HEAD_PKG = REPO / "studio/frontend/package.json"
+HEAD_LOCK = REPO / "studio/frontend/package-lock.json"
+SCRIPT = REPO / "scripts/check_frontend_dep_removal.py"
+
+
+@dataclass
+class Case:
+ id: str
+ desc: str
+ remove: list[str]
+ expected_status: str # "PASS" | "FAIL"
+ expected_failures: list[str]
+ move_to_dev: list[str] | None = None # rare: deps moved, not removed
+
+
+CASES: list[Case] = [
+ Case(
+ "C1",
+ "removing next-themes breaks 2 src imports",
+ ["next-themes"],
+ "FAIL",
+ ["next-themes"],
+ ),
+ Case(
+ "C2",
+ "removing @xyflow/react breaks recipe-studio src imports "
+ "(no other declared dep pulls @xyflow/react)",
+ ["@xyflow/react"],
+ "FAIL",
+ ["@xyflow/react"],
+ ),
+ Case(
+ "C3",
+ "removing katex is safe: streamdown/math, mermaid, "
+ "rehype-katex all keep it at top level",
+ ["katex"],
+ "PASS",
+ [],
+ ),
+ Case("C4", "removing clsx is safe: streamdown keeps it", ["clsx"], "PASS", []),
+ Case(
+ "C5",
+ "removing react is safe: peer of countless packages",
+ ["react"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C6",
+ "removing @radix-ui/react-slot is safe: pulled by "
+ "radix-ui umbrella + @assistant-ui/react",
+ ["@radix-ui/react-slot"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C7",
+ "removing zustand is safe: @assistant-ui/react keeps "
+ "top-level zustand@5.x (nested xyflow 4.x is irrelevant "
+ "to src imports)",
+ ["zustand"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C8",
+ "multi-remove with mixed safety: next-themes + "
+ "@huggingface/hub + dexie all unsafe",
+ ["next-themes", "@huggingface/hub", "dexie"],
+ "FAIL",
+ ["next-themes", "@huggingface/hub", "dexie"],
+ ),
+ Case(
+ "C9",
+ "removing @huggingface/hub breaks 5+ src imports",
+ ["@huggingface/hub"],
+ "FAIL",
+ ["@huggingface/hub"],
+ ),
+ Case(
+ "C10",
+ "removing tailwind-merge is safe: streamdown keeps it",
+ ["tailwind-merge"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C11",
+ "removing a non-existent name is a no-op",
+ ["__never_existed_in_pkg__"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C12",
+ "moving @hugeicons/react from deps to devDeps is NOT a "
+ "removal (still declared)",
+ [],
+ "PASS",
+ [],
+ move_to_dev = ["@hugeicons/react"],
+ ),
+ Case(
+ "C13",
+ "removing @huggingface/hub AND @xyflow/react together: both "
+ "are root-only deps with no other parents, so both should FAIL",
+ ["@huggingface/hub", "@xyflow/react"],
+ "FAIL",
+ ["@huggingface/hub", "@xyflow/react"],
+ ),
+ Case(
+ "C14",
+ "removing dexie breaks src imports (no other declared " "dep needs it)",
+ ["dexie"],
+ "FAIL",
+ ["dexie"],
+ ),
+ Case(
+ "C15",
+ "removing motion (used in 20+ src imports including "
+ "framer-motion-style animations); no transitive parent",
+ ["motion"],
+ "FAIL",
+ ["motion"],
+ ),
+ Case(
+ "C16",
+ "removing canvas-confetti (imported in confetti.tsx); " "no transitive parent",
+ ["canvas-confetti"],
+ "FAIL",
+ ["canvas-confetti"],
+ ),
+ Case(
+ "C17",
+ "removing recharts (imported in chart.tsx); no transitive " "parent",
+ ["recharts"],
+ "FAIL",
+ ["recharts"],
+ ),
+ Case(
+ "C18",
+ "removing js-yaml is safe: @eslint/eslintrc keeps it "
+ "(triggers @types/js-yaml orphan warning, non-fatal)",
+ ["js-yaml"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C19",
+ "removing node-forge (imported in providers-api.ts); " "no transitive parent",
+ ["node-forge"],
+ "FAIL",
+ ["node-forge"],
+ ),
+ Case(
+ "C20",
+ "removing @tauri-apps/api is safe: all 5 @tauri-apps "
+ "plugins declare it as a direct dep",
+ ["@tauri-apps/api"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C21",
+ "removing mammoth (imported in runtime-provider.tsx); " "no transitive parent",
+ ["mammoth"],
+ "FAIL",
+ ["mammoth"],
+ ),
+ Case(
+ "C22",
+ "removing unpdf (imported in runtime-provider.tsx); " "no transitive parent",
+ ["unpdf"],
+ "FAIL",
+ ["unpdf"],
+ ),
+ Case(
+ "C23",
+ "removing remark-gfm is safe: streamdown declares it " "as a direct dep",
+ ["remark-gfm"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C24",
+ "removing date-fns is safe: react-day-picker and "
+ "@base-ui/react both declare it as a direct dep",
+ ["date-fns"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C25",
+ "removing vite is safe: @vitejs/plugin-react and @tailwindcss/vite "
+ "keep it via peer (bin still resolves)",
+ ["vite"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C26",
+ "removing typescript is safe: 11 transitive @typescript-eslint/* "
+ "parents keep tsc bin alive",
+ ["typescript"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C27",
+ "removing eslint is safe: typescript-eslint and eslint-plugin-* "
+ "peers keep eslint bin alive",
+ ["eslint"],
+ "PASS",
+ [],
+ ),
+ Case(
+ "C28",
+ "removing @biomejs/biome breaks scripts.biome:check / biome:fix "
+ "(no transitive parents, biome bin orphans)",
+ ["@biomejs/biome"],
+ "FAIL",
+ ["@biomejs/biome"],
+ ),
+ Case(
+ "C29",
+ "removing both @biomejs/biome AND @vitejs/plugin-react together: "
+ "biome dies outright; vite loses one of its two retained peers "
+ "but @tailwindcss/vite still keeps it",
+ ["@biomejs/biome", "@vitejs/plugin-react"],
+ "FAIL",
+ ["@biomejs/biome", "@vitejs/plugin-react"],
+ ),
+]
+
+
+def synth_head(head_pkg: dict, case: Case) -> dict:
+ out = json.loads(json.dumps(head_pkg))
+ for name in case.remove:
+ for field in (
+ "dependencies",
+ "devDependencies",
+ "peerDependencies",
+ "optionalDependencies",
+ ):
+ (out.get(field) or {}).pop(name, None)
+ if case.move_to_dev:
+ for name in case.move_to_dev:
+ v = (out.get("dependencies") or {}).pop(name, None)
+ if v is not None:
+ out.setdefault("devDependencies", {})[name] = v
+ return out
+
+
+def run_case(case: Case, head_pkg: dict) -> tuple[bool, str]:
+ synth = synth_head(head_pkg, case)
+ with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f:
+ json.dump(synth, f, indent = 2)
+ synth_path = f.name
+ try:
+ proc = subprocess.run(
+ [
+ sys.executable,
+ str(SCRIPT),
+ "--base-pkg",
+ str(HEAD_PKG),
+ "--head-pkg",
+ synth_path,
+ "--head-lock",
+ str(HEAD_LOCK),
+ ],
+ capture_output = True,
+ text = True,
+ )
+ finally:
+ os.unlink(synth_path)
+
+ actual_status = {0: "PASS", 1: "FAIL"}.get(proc.returncode, f"RC{proc.returncode}")
+ failure_pkgs: list[str] = []
+ in_summary = False
+ for line in proc.stdout.splitlines():
+ if "FAIL:" in line and "removed package" in line:
+ in_summary = True
+ continue
+ if in_summary and line.strip().startswith("- "):
+ failure_pkgs.append(line.strip()[2:])
+
+ ok = actual_status == case.expected_status and set(failure_pkgs) == set(
+ case.expected_failures
+ )
+ return ok, (
+ f"expected: status={case.expected_status} fails={sorted(case.expected_failures)}\n"
+ f"actual: status={actual_status} fails={sorted(failure_pkgs)}\n"
+ f"--- stdout (first 30 lines) ---\n" + "\n".join(proc.stdout.splitlines()[:30])
+ )
+
+
+# ---------------------------------------------------------------------------
+# Classifier unit tests: feed hand-crafted snippets directly into classify()
+# and assert the returned kind. Covers sneaky import shapes that an
+# adversarial / careless dev might use to obscure a real usage.
+# ---------------------------------------------------------------------------
+
+# Import the script's classify() by file path so this test does not need
+# the package to be installed.
+import importlib.util as _ilu
+
+_spec = _ilu.spec_from_file_location("_dep_check", str(SCRIPT))
+_dep_check = _ilu.module_from_spec(_spec)
+sys.modules["_dep_check"] = _dep_check # required so @dataclass can resolve annotations
+_spec.loader.exec_module(_dep_check)
+classify = _dep_check.classify
+_next_real_bin = _dep_check._next_real_bin
+scripts_bin_refs = _dep_check.scripts_bin_refs
+
+
+@dataclass
+class ClassifyCase:
+ id: str
+ desc: str
+ pkg: str
+ file: str
+ content: str
+ expected_kind: str | None # None means "no detection"
+
+
+CLASSIFY_CASES: list[ClassifyCase] = [
+ # Bog-standard shapes
+ ClassifyCase(
+ "U01",
+ "single-line static import",
+ "next-themes",
+ "src/x.tsx",
+ 'import { ThemeProvider } from "next-themes";',
+ "static_import",
+ ),
+ ClassifyCase(
+ "U02",
+ "side-effect import",
+ "katex",
+ "src/x.tsx",
+ 'import "katex/dist/katex.min.css";',
+ "side_effect_import",
+ ),
+ ClassifyCase(
+ "U03",
+ "dynamic import",
+ "@tauri-apps/api",
+ "src/x.tsx",
+ 'const { x } = await import("@tauri-apps/api/window");',
+ "dynamic_import",
+ ),
+ ClassifyCase(
+ "U04",
+ "require()",
+ "lodash",
+ "src/x.js",
+ 'const _ = require("lodash");',
+ "require",
+ ),
+ ClassifyCase(
+ "U05",
+ "CSS @import",
+ "tailwindcss",
+ "src/x.css",
+ '@import "tailwindcss";',
+ "css_import",
+ ),
+ # Sneaky shapes
+ ClassifyCase(
+ "U06",
+ "multi-line static import",
+ "next-themes",
+ "src/x.tsx",
+ 'import {\n ThemeProvider,\n useTheme,\n} from "next-themes";',
+ "static_import",
+ ),
+ ClassifyCase(
+ "U07",
+ "import type",
+ "@huggingface/hub",
+ "src/x.ts",
+ 'import type { PipelineType } from "@huggingface/hub";',
+ "static_import",
+ ),
+ ClassifyCase(
+ "U08",
+ "export * from re-export",
+ "@some-org/secrets",
+ "src/x.ts",
+ 'export * from "@some-org/secrets";',
+ "re_export",
+ ),
+ ClassifyCase(
+ "U09",
+ "export { x } from re-export",
+ "lodash-es",
+ "src/x.ts",
+ 'export { foo, bar } from "lodash-es";',
+ "re_export",
+ ),
+ ClassifyCase(
+ "U10",
+ "export type ... from re-export",
+ "@huggingface/hub",
+ "src/x.ts",
+ 'export type { Foo } from "@huggingface/hub";',
+ "re_export",
+ ),
+ ClassifyCase(
+ "U11",
+ "multi-line export from re-export",
+ "@some/pkg",
+ "src/x.ts",
+ 'export {\n thing,\n other,\n} from "@some/pkg";',
+ "re_export",
+ ),
+ ClassifyCase(
+ "U12",
+ "JSDoc @import",
+ "react",
+ "src/x.ts",
+ '/** @type {import("react").FC} */\nconst Foo = () => null;',
+ "dynamic_import",
+ ),
+ ClassifyCase(
+ "U13",
+ "template literal package path",
+ "@assistant-ui/react",
+ "src/x.tsx",
+ "const url = `@assistant-ui/react`;",
+ "template_literal",
+ ),
+ ClassifyCase(
+ "U14",
+ "new URL import-meta",
+ "monaco-editor",
+ "src/x.ts",
+ 'new URL("monaco-editor/esm/vs/editor/editor.worker", import.meta.url);',
+ "new_url",
+ ),
+ ClassifyCase(
+ "U15",
+ "tsc triple-slash type ref",
+ "@types/some-pkg",
+ "src/x.ts",
+ '/// ',
+ "tsc_triple_slash",
+ ),
+ ClassifyCase(
+ "U16",
+ "HTML script src",
+ "alpinejs",
+ "index.html",
+ '',
+ "html_script",
+ ),
+ ClassifyCase(
+ "U17",
+ "HTML link href",
+ "alpinejs",
+ "index.html",
+ '',
+ "html_link",
+ ),
+ ClassifyCase(
+ "U18",
+ "bare quoted string in tsconfig paths",
+ "@huggingface/hub",
+ "tsconfig.json",
+ '"paths": { "hf": ["@huggingface/hub/*"] }',
+ "string_literal",
+ ),
+ ClassifyCase(
+ "U19",
+ "vite alias key",
+ "@dagrejs/dagre",
+ "vite.config.ts",
+ '"@dagrejs/dagre": path.resolve(__dirname, "./..."),',
+ "string_literal",
+ ),
+ # False-positive guards (these should NOT detect)
+ ClassifyCase(
+ "U20",
+ "different package with shared prefix",
+ "foo",
+ "src/x.ts",
+ 'import { x } from "foobar";',
+ None,
+ ),
+ ClassifyCase(
+ "U21",
+ "package mentioned in plain comment text",
+ "react",
+ "src/x.ts",
+ "// We migrated from react-router to tanstack-router",
+ None,
+ ),
+ ClassifyCase(
+ "U22",
+ "package name as a URL path tail is NOT detected "
+ "(boundary rule: pkg must be followed by quote or `/`)",
+ "react",
+ "src/x.ts",
+ 'const docs = "https://example.com/react";',
+ None,
+ ),
+ ClassifyCase(
+ "U23",
+ "package name in Python file (ignored, "
+ "Python can never import npm packages)",
+ "playwright",
+ "tests/x.py",
+ 'label: str = "playwright"',
+ None,
+ ),
+ ClassifyCase(
+ "U24",
+ "exact-prefix collision: pkg 'lodash' and 'lodash-es'",
+ "lodash",
+ "src/x.ts",
+ 'import _ from "lodash-es";',
+ None,
+ ),
+ ClassifyCase(
+ "U25",
+ "scoped pkg substring collision",
+ "@radix-ui/react-label",
+ "src/x.ts",
+ 'import x from "@radix-ui/react-label-extra";',
+ None,
+ ),
+ ClassifyCase(
+ "U26",
+ "package only mentioned in a markdown link",
+ "react",
+ "README.md",
+ "See [react](https://react.dev).",
+ None,
+ ),
+ ClassifyCase(
+ "U27",
+ "side-effect import with subpath",
+ "katex",
+ "src/x.css",
+ '@import "katex/dist/katex.min.css";',
+ "css_import",
+ ),
+ ClassifyCase(
+ "U28",
+ "require.resolve",
+ "lodash",
+ "build/x.cjs",
+ 'const path = require.resolve("lodash/fp");',
+ "require",
+ ),
+ ClassifyCase(
+ "U29",
+ "TypeScript ambient `declare module`",
+ "@tanstack/react-router",
+ "src/app/router.tsx",
+ 'declare module "@tanstack/react-router" {\n interface X {}\n}',
+ "string_literal",
+ ),
+ ClassifyCase(
+ "U30",
+ "namespace import `import * as X from pkg`",
+ "@radix-ui/react-slot",
+ "src/x.tsx",
+ 'import * as Slot from "@radix-ui/react-slot";',
+ "static_import",
+ ),
+ ClassifyCase(
+ "U31",
+ "combined default + named import",
+ "react",
+ "src/x.tsx",
+ 'import React, { useState } from "react";',
+ "static_import",
+ ),
+ ClassifyCase(
+ "U32",
+ "default-as-named import alias",
+ "react",
+ "src/x.tsx",
+ 'import { default as R } from "react";',
+ "static_import",
+ ),
+ ClassifyCase(
+ "U33",
+ "re-export default",
+ "lodash",
+ "src/x.ts",
+ 'export { default } from "lodash";',
+ "re_export",
+ ),
+ ClassifyCase(
+ "U34",
+ "re-export default as alias",
+ "lodash",
+ "src/x.ts",
+ 'export { default as _ } from "lodash";',
+ "re_export",
+ ),
+ ClassifyCase(
+ "U35",
+ ".then() dynamic import (no await)",
+ "@tauri-apps/api",
+ "src/x.ts",
+ 'import("@tauri-apps/api/window").then(m => m.x());',
+ "dynamic_import",
+ ),
+ ClassifyCase(
+ "U36",
+ "TypeScript import() in type position",
+ "react",
+ "src/x.ts",
+ 'type C = import("react").ComponentType;',
+ "dynamic_import",
+ ),
+ # File-type gating (codex P1: JS classifiers must not fire on
+ # non-script files). Python fixtures and Markdown code blocks often
+ # contain literal JS-shaped strings for documentation or test data,
+ # so a bare `import x from "pkg"` inside a .py / .md / .sh / .yml is
+ # not a real npm usage.
+ ClassifyCase(
+ "U37",
+ "JS import snippet inside a Python fixture string is NOT a usage",
+ "next-themes",
+ "tests/studio/something.py",
+ "snippet = 'import x from \"next-themes\";'",
+ None,
+ ),
+ ClassifyCase(
+ "U38",
+ "JS import snippet inside a Markdown code fence is NOT a usage",
+ "next-themes",
+ "docs/example.md",
+ '```ts\nimport x from "next-themes";\n```',
+ None,
+ ),
+ ClassifyCase(
+ "U39",
+ "JS import inside a shell script is NOT classified as a JS usage",
+ "next-themes",
+ "scripts/build.sh",
+ 'echo "import x from \\"next-themes\\";"',
+ None,
+ ),
+ ClassifyCase(
+ "U40",
+ "JS import inside a YAML workflow is NOT classified as a JS usage",
+ "next-themes",
+ ".github/workflows/x.yml",
+ "run: echo 'import x from \"next-themes\";'",
+ None,
+ ),
+ # HTML script/link must respect package-name boundaries: a
+ # `/node_modules/foo-extra/...` reference does NOT use `foo`.
+ ClassifyCase(
+ "U41",
+ "HTML ',
+ None,
+ ),
+ ClassifyCase(
+ "U42",
+ "HTML with similar-prefix package is NOT a match",
+ "foo",
+ "index.html",
+ '',
+ None,
+ ),
+ ClassifyCase(
+ "U43",
+ "HTML ',
+ "html_script",
+ ),
+ # CSS url() unquoted variant -- valid CSS, must classify the same
+ # as the quoted variant.
+ ClassifyCase(
+ "U44",
+ "CSS url() unquoted bare package path",
+ "katex",
+ "src/x.css",
+ "src: url(katex/dist/fonts/font.woff2);",
+ "css_url",
+ ),
+ ClassifyCase(
+ "U45",
+ "CSS url() quoted bare package path still works",
+ "katex",
+ "src/x.css",
+ 'src: url("katex/dist/fonts/font.woff2");',
+ "css_url",
+ ),
+]
+
+
+def run_classify_unit_tests() -> int:
+ passed = 0
+ for c in CLASSIFY_CASES:
+ actual = classify(c.pkg, c.file, c.content)
+ ok = actual == c.expected_kind
+ mark = "PASS" if ok else "FAIL"
+ print(f" [{mark}] {c.id}: {c.desc}")
+ if not ok:
+ print(f" pkg={c.pkg!r} file={c.file!r}")
+ print(f" content={c.content!r}")
+ print(f" expected={c.expected_kind!r}, actual={actual!r}")
+ if ok:
+ passed += 1
+ print()
+ print(f"{passed}/{len(CLASSIFY_CASES)} classify-unit cases pass")
+ return 0 if passed == len(CLASSIFY_CASES) else 1
+
+
+# ---------------------------------------------------------------------------
+# Adversarial end-to-end cases: drop a sneaky synthetic file into src/,
+# run the checker, then clean up. Catches the case where pattern detection
+# regresses for a real grep+classify pipeline (not just classify in isolation).
+# ---------------------------------------------------------------------------
+
+ADVERSARIAL_TMP_DIR = REPO / "studio/frontend/src/__dep_check_adversarial__"
+
+
+@dataclass
+class AdvCase:
+ id: str
+ desc: str
+ filename: str
+ content: str
+ target_pkg: str
+ expected_status: str
+ expected_failures: list[str]
+
+
+ADV_CASES: list[AdvCase] = [
+ AdvCase(
+ "A01",
+ "multi-line import of removed pkg should FAIL",
+ "adv01.ts",
+ 'import {\n foo,\n bar,\n} from "__adv_only_pkg_a__";\n',
+ "__adv_only_pkg_a__",
+ "FAIL",
+ ["__adv_only_pkg_a__"],
+ ),
+ AdvCase(
+ "A02",
+ "export * from removed pkg should FAIL",
+ "adv02.ts",
+ 'export * from "__adv_only_pkg_b__";\n',
+ "__adv_only_pkg_b__",
+ "FAIL",
+ ["__adv_only_pkg_b__"],
+ ),
+ AdvCase(
+ "A03",
+ "export { x } from removed pkg should FAIL",
+ "adv03.ts",
+ 'export { foo, bar } from "__adv_only_pkg_c__";\n',
+ "__adv_only_pkg_c__",
+ "FAIL",
+ ["__adv_only_pkg_c__"],
+ ),
+ AdvCase(
+ "A04",
+ "export type ... from removed pkg should FAIL",
+ "adv04.ts",
+ 'export type { Foo } from "__adv_only_pkg_d__";\n',
+ "__adv_only_pkg_d__",
+ "FAIL",
+ ["__adv_only_pkg_d__"],
+ ),
+ AdvCase(
+ "A05",
+ "package with similar prefix should NOT trigger FAIL",
+ "adv05.ts",
+ # The file imports __adv_only_pkg_e_extra__, but we will try
+ # to "remove" the shorter __adv_only_pkg_e__ name. The shorter
+ # name has zero real usage, so removal must be safe.
+ 'import x from "__adv_only_pkg_e_extra__";\n',
+ "__adv_only_pkg_e__",
+ "PASS",
+ [],
+ ),
+ AdvCase(
+ "A06",
+ "dynamic import of removed pkg should FAIL",
+ "adv06.ts",
+ 'const m = await import("__adv_only_pkg_f__");\n',
+ "__adv_only_pkg_f__",
+ "FAIL",
+ ["__adv_only_pkg_f__"],
+ ),
+ AdvCase(
+ "A07",
+ "new URL of removed pkg should FAIL",
+ "adv07.ts",
+ 'const w = new URL("__adv_only_pkg_g__/worker.js", import.meta.url);\n',
+ "__adv_only_pkg_g__",
+ "FAIL",
+ ["__adv_only_pkg_g__"],
+ ),
+ AdvCase(
+ "A08",
+ "string-concat dynamic import is unanalyzable (PASS)",
+ "adv08.ts",
+ 'const m = await import("__adv_only_" + "pkg_h__");\n',
+ "__adv_only_pkg_h__",
+ "PASS",
+ [],
+ ),
+ AdvCase(
+ "A09",
+ "package referenced only inside a JS comment "
+ "is conservatively flagged via the string_literal fallback "
+ "(this is acceptable -- err on the side of caution)",
+ "adv09.ts",
+ '// TODO: import x from "__adv_only_pkg_i__"\n',
+ "__adv_only_pkg_i__",
+ "FAIL",
+ ["__adv_only_pkg_i__"],
+ ),
+ AdvCase(
+ "A10",
+ "package referenced only in a Python file should " "NOT trigger a JS FAIL",
+ "adv10.py",
+ 'label = "__adv_only_pkg_j__"\n',
+ "__adv_only_pkg_j__",
+ "PASS",
+ [],
+ ),
+ AdvCase(
+ "A11",
+ "package mentioned in a markdown doc file is "
+ "ignored by JS-like-only string_literal",
+ "adv11.md",
+ "See [docs](https://example.com/__adv_only_pkg_k__).\n",
+ "__adv_only_pkg_k__",
+ "PASS",
+ [],
+ ),
+ AdvCase(
+ "A12",
+ "JSDoc @import of removed pkg should FAIL",
+ "adv12.ts",
+ '/** @type {import("__adv_only_pkg_l__").Foo} */\n' "const x = null;\n",
+ "__adv_only_pkg_l__",
+ "FAIL",
+ ["__adv_only_pkg_l__"],
+ ),
+ # Prettier formats a long named-import list one identifier per line.
+ # 22 imports + braces puts the `import` keyword ~22 lines away from
+ # the `from "pkg"` clause. Before the window widening, the classify
+ # multi-line fallback used ±4 lines, which silently missed every
+ # such block. This case fails with the old window and passes once
+ # the window is wide enough (currently ±25).
+ AdvCase(
+ "A13",
+ "Prettier-style 22-identifier multi-line import should FAIL "
+ "(exercises the widened multi-line classify window)",
+ "adv13.ts",
+ "import {\n"
+ + "".join(f" ident_{i:02d},\n" for i in range(22))
+ + '} from "__adv_only_pkg_m__";\n',
+ "__adv_only_pkg_m__",
+ "FAIL",
+ ["__adv_only_pkg_m__"],
+ ),
+]
+
+
+# ---------------------------------------------------------------------------
+# package.json field-reference cases: simulate `prettier: "@x/config"`,
+# `eslintConfig.extends`, `overrides`, `peerDependenciesMeta`, etc.
+# These test the package_json_extra_refs() coverage. Cross-checked against
+# the patterns used by Tailwind, Stylelint, Prettier, Next.js, Astro,
+# TypeScript, ESLint, SvelteKit, Storybook, Vite, and TanStack/Query
+# manifests.
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class PkgFieldCase:
+ id: str
+ desc: str
+ field_patch: dict # extra fields to merge into synth_head package.json
+ target_pkg: str
+ expected_status: str
+ expected_failures: list[str]
+
+
+PKG_FIELD_CASES: list[PkgFieldCase] = [
+ PkgFieldCase(
+ "P01",
+ "removing pkg referenced only in `prettier` string field",
+ {"prettier": "__pkg_prettier_config__"},
+ "__pkg_prettier_config__",
+ "FAIL",
+ ["__pkg_prettier_config__"],
+ ),
+ PkgFieldCase(
+ "P02",
+ "removing pkg referenced in `eslintConfig.extends` array",
+ {"eslintConfig": {"extends": ["__pkg_eslint_cfg__"]}},
+ "__pkg_eslint_cfg__",
+ "FAIL",
+ ["__pkg_eslint_cfg__"],
+ ),
+ PkgFieldCase(
+ "P03",
+ "removing pkg referenced in `stylelint.plugins`",
+ {"stylelint": {"plugins": ["__pkg_stylelint_plugin__"]}},
+ "__pkg_stylelint_plugin__",
+ "FAIL",
+ ["__pkg_stylelint_plugin__"],
+ ),
+ PkgFieldCase(
+ "P04",
+ "removing pkg referenced in `babel.presets`",
+ {"babel": {"presets": [["__pkg_babel_preset__", {"opt": 1}]]}},
+ "__pkg_babel_preset__",
+ "FAIL",
+ ["__pkg_babel_preset__"],
+ ),
+ PkgFieldCase(
+ "P05",
+ "removing pkg used as a key in `overrides`",
+ {"overrides": {"__pkg_overridden__": "^1.0.0"}},
+ "__pkg_overridden__",
+ "FAIL",
+ ["__pkg_overridden__"],
+ ),
+ PkgFieldCase(
+ "P06",
+ "removing pkg used as a key in `pnpm.overrides`",
+ {"pnpm": {"overrides": {"__pkg_pnpm_override__": "^1.0.0"}}},
+ "__pkg_pnpm_override__",
+ "FAIL",
+ ["__pkg_pnpm_override__"],
+ ),
+ PkgFieldCase(
+ "P07",
+ "removing pkg used as a key in `pnpm.patchedDependencies`",
+ {"pnpm": {"patchedDependencies": {"__pkg_patched__": "patches/x.patch"}}},
+ "__pkg_patched__",
+ "FAIL",
+ ["__pkg_patched__"],
+ ),
+ PkgFieldCase(
+ "P08",
+ "removing pkg used as a key in `peerDependenciesMeta`",
+ {"peerDependenciesMeta": {"__pkg_peer_meta__": {"optional": True}}},
+ "__pkg_peer_meta__",
+ "FAIL",
+ ["__pkg_peer_meta__"],
+ ),
+ PkgFieldCase(
+ "P09",
+ "removing pkg referenced in `jest.preset` string",
+ {"jest": {"preset": "__pkg_jest_preset__"}},
+ "__pkg_jest_preset__",
+ "FAIL",
+ ["__pkg_jest_preset__"],
+ ),
+ PkgFieldCase(
+ "P10",
+ "removing pkg referenced in `commitlint.extends`",
+ {"commitlint": {"extends": ["__pkg_commitlint__"]}},
+ "__pkg_commitlint__",
+ "FAIL",
+ ["__pkg_commitlint__"],
+ ),
+ PkgFieldCase(
+ "P11",
+ "removing pkg referenced in `renovate.extends`",
+ {"renovate": {"extends": ["__pkg_renovate__"]}},
+ "__pkg_renovate__",
+ "FAIL",
+ ["__pkg_renovate__"],
+ ),
+ PkgFieldCase(
+ "P12",
+ "removing pkg referenced in `remarkConfig.plugins`",
+ {"remarkConfig": {"plugins": ["__pkg_remark__"]}},
+ "__pkg_remark__",
+ "FAIL",
+ ["__pkg_remark__"],
+ ),
+ PkgFieldCase(
+ "P13",
+ "removing pkg with subpath ref in tool config (`pkg/config`)",
+ {"prettier": "__pkg_prettier_sub__/config"},
+ "__pkg_prettier_sub__",
+ "FAIL",
+ ["__pkg_prettier_sub__"],
+ ),
+ PkgFieldCase(
+ "P14",
+ "false-positive guard: similar-prefix package in tool config",
+ {"prettier": "__pkg_short_extra__/config"},
+ "__pkg_short__",
+ "PASS",
+ [],
+ ),
+ PkgFieldCase(
+ "P15",
+ "false-positive guard: package-named string in `browserslist` "
+ "must NOT trigger (browserslist values are browser queries, "
+ "never package names)",
+ {"browserslist": ["last 2 versions", "__pkg_browserslist__"]},
+ "__pkg_browserslist__",
+ "PASS",
+ [],
+ ),
+ PkgFieldCase(
+ "P16",
+ "false-positive guard: matching string in `keywords` field",
+ {"keywords": ["__pkg_keyword__", "foo"]},
+ "__pkg_keyword__",
+ "PASS",
+ [],
+ ),
+ PkgFieldCase(
+ "P17",
+ "false-positive guard: matching string in `workspaces` (paths)",
+ {"workspaces": ["packages/__pkg_workspace_path__"]},
+ "__pkg_workspace_path__",
+ "PASS",
+ [],
+ ),
+ PkgFieldCase(
+ "P18",
+ "false-positive guard: matching value in `files` field",
+ {"files": ["dist/__pkg_in_files__"]},
+ "__pkg_in_files__",
+ "PASS",
+ [],
+ ),
+ PkgFieldCase(
+ "P19",
+ "false-positive guard: matching `packageManager` string",
+ {"packageManager": "__pkg_in_pm__@1.0.0"},
+ "__pkg_in_pm__",
+ "PASS",
+ [],
+ ),
+]
+
+
+def run_pkg_field_cases() -> int:
+ head_pkg = json.loads(HEAD_PKG.read_text())
+ passed = 0
+ for pc in PKG_FIELD_CASES:
+ synth_head = json.loads(json.dumps(head_pkg))
+ # Apply the field patch (deep-merge isn't needed; we control the keys).
+ for k, v in pc.field_patch.items():
+ synth_head[k] = v
+ # Base has the target in dependencies; head does not. The extra field
+ # in synth_head references the target pkg even though it's no longer
+ # in deps.
+ synth_base = json.loads(json.dumps(head_pkg))
+ synth_base.setdefault("dependencies", {})[pc.target_pkg] = "^1.0.0"
+ with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f:
+ json.dump(synth_base, f, indent = 2)
+ base_path = f.name
+ with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f:
+ json.dump(synth_head, f, indent = 2)
+ head_path = f.name
+ try:
+ proc = subprocess.run(
+ [
+ sys.executable,
+ str(SCRIPT),
+ "--base-pkg",
+ base_path,
+ "--head-pkg",
+ head_path,
+ "--head-lock",
+ str(HEAD_LOCK),
+ ],
+ capture_output = True,
+ text = True,
+ cwd = str(REPO),
+ )
+ finally:
+ os.unlink(base_path)
+ os.unlink(head_path)
+ actual_status = {0: "PASS", 1: "FAIL"}.get(
+ proc.returncode, f"RC{proc.returncode}"
+ )
+ fails: list[str] = []
+ in_summary = False
+ for line in proc.stdout.splitlines():
+ if "FAIL:" in line and "removed package" in line:
+ in_summary = True
+ continue
+ if in_summary and line.strip().startswith("- "):
+ fails.append(line.strip()[2:])
+ # The expected_failures includes the tolerated-FP case (P15); we
+ # accept BOTH expected_status and expected_failures matches.
+ ok = actual_status == pc.expected_status and set(fails) == set(
+ pc.expected_failures
+ )
+ mark = "PASS" if ok else "FAIL"
+ print(f" [{mark}] {pc.id}: {pc.desc}")
+ if not ok:
+ print(
+ f" expected: status={pc.expected_status} fails={pc.expected_failures}"
+ )
+ print(f" actual: status={actual_status} fails={fails}")
+ for ln in proc.stdout.splitlines()[:25]:
+ print(f" {ln}")
+ if ok:
+ passed += 1
+ print()
+ print(f"{passed}/{len(PKG_FIELD_CASES)} package.json-field cases pass")
+ return 0 if passed == len(PKG_FIELD_CASES) else 1
+
+
+def run_adversarial_cases() -> int:
+ ADVERSARIAL_TMP_DIR.mkdir(parents = True, exist_ok = True)
+ head_pkg = json.loads(HEAD_PKG.read_text())
+ passed = 0
+ for ac in ADV_CASES:
+ # Drop the synthetic file.
+ fpath = ADVERSARIAL_TMP_DIR / ac.filename
+ try:
+ fpath.write_text(ac.content)
+ # Build a synthetic base that has the target pkg added; head
+ # is the real head (without it). The script sees the pkg as
+ # removed and scans the repo, which now includes our file.
+ synth_base = json.loads(json.dumps(head_pkg))
+ synth_base.setdefault("dependencies", {})[ac.target_pkg] = "^1.0.0"
+ with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f:
+ json.dump(synth_base, f, indent = 2)
+ base_path = f.name
+ try:
+ proc = subprocess.run(
+ [
+ sys.executable,
+ str(SCRIPT),
+ "--base-pkg",
+ base_path,
+ "--head-pkg",
+ str(HEAD_PKG),
+ "--head-lock",
+ str(HEAD_LOCK),
+ ],
+ capture_output = True,
+ text = True,
+ cwd = str(REPO),
+ )
+ finally:
+ os.unlink(base_path)
+ actual_status = {0: "PASS", 1: "FAIL"}.get(
+ proc.returncode, f"RC{proc.returncode}"
+ )
+ fails = []
+ in_summary = False
+ for line in proc.stdout.splitlines():
+ if "FAIL:" in line and "removed package" in line:
+ in_summary = True
+ continue
+ if in_summary and line.strip().startswith("- "):
+ fails.append(line.strip()[2:])
+ ok = actual_status == ac.expected_status and set(fails) == set(
+ ac.expected_failures
+ )
+ mark = "PASS" if ok else "FAIL"
+ print(f" [{mark}] {ac.id}: {ac.desc}")
+ if not ok:
+ print(
+ f" expected: status={ac.expected_status} fails={ac.expected_failures}"
+ )
+ print(f" actual: status={actual_status} fails={fails}")
+ for ln in proc.stdout.splitlines()[:20]:
+ print(f" {ln}")
+ if ok:
+ passed += 1
+ finally:
+ try:
+ fpath.unlink()
+ except FileNotFoundError:
+ pass
+ # Clean up the directory.
+ try:
+ ADVERSARIAL_TMP_DIR.rmdir()
+ except OSError:
+ pass
+ print()
+ print(f"{passed}/{len(ADV_CASES)} adversarial cases pass")
+ return 0 if passed == len(ADV_CASES) else 1
+
+
+# ---------------------------------------------------------------------------
+# Dead-dep enumeration cases.
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class EnumCase:
+ id: str
+ desc: str
+ add_deps: dict[str, str]
+ add_dev_deps: dict[str, str]
+ field_patch: dict
+ extra_file: tuple[str, str] | None # (relative_path, content) or None
+ expected_unused: set[str]
+ expected_used: set[str]
+ expected_orphan_types: set[str]
+
+
+ENUM_CASES: list[EnumCase] = [
+ EnumCase(
+ "E01",
+ "fake dep with no usage anywhere is flagged unused",
+ {"__enum_fake_unused_pkg__": "^1.0.0"},
+ {},
+ {},
+ None,
+ {"__enum_fake_unused_pkg__"},
+ set(),
+ set(),
+ ),
+ EnumCase(
+ "E02",
+ "fake dep referenced via vite.config-style import is flagged used "
+ "(uses a real adversarial file as the import site)",
+ {"__enum_used_via_src__": "^1.0.0"},
+ {},
+ {},
+ (
+ "src/__dep_check_adversarial__/enum_e02.ts",
+ 'import x from "__enum_used_via_src__";\n',
+ ),
+ set(),
+ {"__enum_used_via_src__"},
+ set(),
+ ),
+ EnumCase(
+ "E03",
+ "fake dep referenced only in package.json `overrides` is flagged used",
+ {"__enum_used_via_overrides__": "^1.0.0"},
+ {},
+ {"overrides": {"__enum_used_via_overrides__": "^1.0.0"}},
+ None,
+ set(),
+ {"__enum_used_via_overrides__"},
+ set(),
+ ),
+ EnumCase(
+ "E04",
+ "@types/X where X is declared -> kept (NOT orphan)",
+ {"__enum_real_pkg__": "^1.0.0"},
+ {"@types/__enum_real_pkg__": "^1.0.0"},
+ {},
+ (
+ "src/__dep_check_adversarial__/enum_e04.ts",
+ 'import x from "__enum_real_pkg__";\n',
+ ),
+ set(),
+ {"__enum_real_pkg__"},
+ set(),
+ ),
+ EnumCase(
+ "E05",
+ "@types/X where X is NOT declared anywhere -> orphan",
+ {},
+ {"@types/__enum_orphan_pkg__": "^1.0.0"},
+ {},
+ None,
+ set(),
+ set(),
+ {"@types/__enum_orphan_pkg__"},
+ ),
+]
+
+
+def run_enum_cases() -> int:
+ head_pkg = json.loads(HEAD_PKG.read_text())
+ passed = 0
+ ADVERSARIAL_TMP_DIR.mkdir(parents = True, exist_ok = True)
+ for ec in ENUM_CASES:
+ synth_head = json.loads(json.dumps(head_pkg))
+ synth_head.setdefault("dependencies", {}).update(ec.add_deps)
+ synth_head.setdefault("devDependencies", {}).update(ec.add_dev_deps)
+ for k, v in ec.field_patch.items():
+ synth_head[k] = v
+ # Drop any temp source file if needed.
+ fpath = None
+ if ec.extra_file:
+ rel, content = ec.extra_file
+ fpath = REPO / rel
+ fpath.parent.mkdir(parents = True, exist_ok = True)
+ fpath.write_text(content)
+ with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f:
+ json.dump(synth_head, f, indent = 2)
+ head_path = f.name
+ try:
+ proc = subprocess.run(
+ [
+ sys.executable,
+ str(SCRIPT),
+ "--base-pkg",
+ str(HEAD_PKG),
+ "--head-pkg",
+ head_path,
+ "--head-lock",
+ str(HEAD_LOCK),
+ "--enumerate-dead",
+ ],
+ capture_output = True,
+ text = True,
+ cwd = str(REPO),
+ )
+ finally:
+ os.unlink(head_path)
+ if fpath:
+ try:
+ fpath.unlink()
+ except FileNotFoundError:
+ pass
+ # Parse the dead-dep enumeration output.
+ unused: set[str] = set()
+ orphans: set[str] = set()
+ in_unused = False
+ in_orphan = False
+ for line in proc.stdout.splitlines():
+ s = line.strip()
+ if s.startswith("unused ("):
+ in_unused = True
+ in_orphan = False
+ continue
+ if s.startswith("type_pkg_orphan ("):
+ in_unused = False
+ in_orphan = True
+ continue
+ if s.startswith("used:") or s.startswith("type_pkg_kept:"):
+ in_unused = in_orphan = False
+ continue
+ if s.startswith("- "):
+ if in_unused:
+ unused.add(s[2:])
+ elif in_orphan:
+ orphans.add(s[2:])
+ unused_ok = ec.expected_unused.issubset(unused) and (
+ not ec.expected_used or not (ec.expected_used & unused)
+ )
+ orphan_ok = ec.expected_orphan_types.issubset(orphans)
+ ok = unused_ok and orphan_ok
+ mark = "PASS" if ok else "FAIL"
+ print(f" [{mark}] {ec.id}: {ec.desc}")
+ if not ok:
+ print(f" expected unused superset: {sorted(ec.expected_unused)}")
+ print(f" expected used NOT in unused: {sorted(ec.expected_used)}")
+ print(
+ f" expected orphans superset: {sorted(ec.expected_orphan_types)}"
+ )
+ print(f" actual unused: {sorted(unused)}")
+ print(f" actual orphans: {sorted(orphans)}")
+ for ln in proc.stdout.splitlines()[:30]:
+ print(f" {ln}")
+ if ok:
+ passed += 1
+ # Cleanup tmp dir if empty.
+ try:
+ ADVERSARIAL_TMP_DIR.rmdir()
+ except OSError:
+ pass
+ print()
+ print(f"{passed}/{len(ENUM_CASES)} enumeration cases pass")
+ return 0 if passed == len(ENUM_CASES) else 1
+
+
+# ---------------------------------------------------------------------------
+# Script-wrapper cases: exercise scripts_bin_refs / _next_real_bin so a
+# package.json script like `cross-env CI=1 biome check` correctly credits
+# `@biomejs/biome` rather than the wrapper itself. The 10x reviewer flagged
+# the original "first non-env token" heuristic as too narrow: any project
+# using cross-env / dotenv / dotenvx / env-cmd / a quoted env value would
+# bypass the bin-name check.
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class WrapperCase:
+ id: str
+ desc: str
+ raw_cmd: str
+ expected_bin: str | None # None means "no real bin (e.g. unwrappable)"
+
+
+WRAPPER_CASES: list[WrapperCase] = [
+ WrapperCase(
+ "W01",
+ "cross-env wraps the real bin",
+ "cross-env CI=1 biome check .",
+ "biome",
+ ),
+ WrapperCase(
+ "W02",
+ "cross-env with multiple env tokens after the wrapper",
+ "cross-env A=1 B=2 NODE_ENV=prod biome check",
+ "biome",
+ ),
+ WrapperCase(
+ "W03",
+ "bare env-prefix run (no wrapper) still peels the env tokens",
+ "FOO=bar biome check",
+ "biome",
+ ),
+ WrapperCase(
+ "W04",
+ "quoted env value with spaces (shlex preserves it as one word)",
+ 'FOO="a b" biome check',
+ "biome",
+ ),
+ WrapperCase(
+ "W05",
+ "npx + cross-env: runner peels, wrapper peels, real bin wins",
+ "npx cross-env CI=1 biome check",
+ "biome",
+ ),
+ WrapperCase(
+ "W06",
+ "pnpm exec + cross-env",
+ "pnpm exec cross-env CI=1 biome check",
+ "biome",
+ ),
+ WrapperCase(
+ "W07",
+ "dotenv with the `--` separator before the wrapped command",
+ "dotenv -- biome check",
+ "biome",
+ ),
+ WrapperCase(
+ "W08",
+ "dotenv with a flag-arg pair and `--` separator",
+ "dotenv -e .env -- biome check",
+ "biome",
+ ),
+ WrapperCase(
+ "W09",
+ "leading `./node_modules/.bin/` prefix is stripped",
+ "./node_modules/.bin/biome check",
+ "biome",
+ ),
+ WrapperCase(
+ "W10",
+ "concurrently is NOT a script wrapper -- it dispatches by "
+ "script *name*, not bin, so the real bin is `concurrently` "
+ "itself (the wrapped script names are credited by their own "
+ "scripts entries, which scripts_bin_refs iterates separately)",
+ 'concurrently "npm:dev" "npm:typecheck"',
+ "concurrently",
+ ),
+]
+
+
+def run_wrapper_cases() -> int:
+ import shlex
+
+ passed = 0
+ for wc in WRAPPER_CASES:
+ try:
+ words = shlex.split(wc.raw_cmd, posix = True)
+ except ValueError:
+ words = wc.raw_cmd.split()
+ actual = _next_real_bin(words, 0)
+ ok = actual == wc.expected_bin
+ mark = "PASS" if ok else "FAIL"
+ print(f" [{mark}] {wc.id}: {wc.desc}")
+ if not ok:
+ print(f" raw_cmd={wc.raw_cmd!r}")
+ print(f" expected={wc.expected_bin!r}, actual={actual!r}")
+ if ok:
+ passed += 1
+
+ # End-to-end integration: feed scripts_bin_refs a synthetic head_pkg
+ # whose scripts use a wrapper, and confirm the package owning the
+ # wrapped bin is credited (rather than the wrapper). This is the
+ # actual call path used by find_command_usage().
+ int_total = 0
+ int_passed = 0
+ int_cases = [
+ (
+ "I01",
+ "cross-env wrapping `biome` credits @biomejs/biome",
+ {"lint": "cross-env CI=1 biome check"},
+ {"biome": "@biomejs/biome"},
+ "@biomejs/biome",
+ ),
+ (
+ "I02",
+ "dotenv -- biome credits @biomejs/biome",
+ {"lint": "dotenv -- biome check"},
+ {"biome": "@biomejs/biome"},
+ "@biomejs/biome",
+ ),
+ (
+ "I03",
+ "quoted env value before bin still credits the bin's owner",
+ {"lint": 'FOO="a b" biome check .'},
+ {"biome": "@biomejs/biome"},
+ "@biomejs/biome",
+ ),
+ (
+ "I04",
+ "&& chain: both halves credit their owning packages",
+ {"build": "tsc -b && cross-env CI=1 biome check"},
+ {"tsc": "typescript", "biome": "@biomejs/biome"},
+ None, # checked via owning_pkgs below
+ ),
+ ]
+ for case_id, desc, scripts, bin_to_pkg, expect_owner in int_cases:
+ int_total += 1
+ refs = scripts_bin_refs({"scripts": scripts}, bin_to_pkg)
+ if case_id == "I04":
+ owners = set(refs.keys())
+ ok = owners == {"typescript", "@biomejs/biome"}
+ else:
+ ok = expect_owner in refs
+ mark = "PASS" if ok else "FAIL"
+ print(f" [{mark}] {case_id}: {desc}")
+ if not ok:
+ print(f" scripts={scripts!r} bin_to_pkg={bin_to_pkg!r}")
+ print(f" refs={refs!r}")
+ if ok:
+ int_passed += 1
+
+ total = len(WRAPPER_CASES) + int_total
+ print()
+ print(f"{passed + int_passed}/{total} wrapper-script cases pass")
+ return 0 if (passed == len(WRAPPER_CASES) and int_passed == int_total) else 1
+
+
+def main() -> int:
+ head_pkg = json.loads(HEAD_PKG.read_text())
+ print(f"Running {len(CASES)} edge cases against {SCRIPT.relative_to(REPO)}")
+ print()
+ results: list[tuple[Case, bool, str]] = []
+ for c in CASES:
+ ok, detail = run_case(c, head_pkg)
+ results.append((c, ok, detail))
+ mark = "PASS" if ok else "FAIL"
+ print(f" [{mark}] {c.id}: {c.desc}")
+ if not ok:
+ for line in detail.splitlines():
+ print(f" {line}")
+ print()
+ passed = sum(1 for _, ok, _ in results if ok)
+ total = len(results)
+ print(f"{passed}/{total} edge cases pass")
+
+ print()
+ print(f"Running {len(CLASSIFY_CASES)} classify() unit cases")
+ print()
+ cls_rc = run_classify_unit_tests()
+
+ print()
+ print(f"Running {len(ADV_CASES)} adversarial end-to-end cases")
+ print()
+ adv_rc = run_adversarial_cases()
+
+ print()
+ print(f"Running {len(PKG_FIELD_CASES)} package.json-field cases")
+ print()
+ pkg_rc = run_pkg_field_cases()
+
+ print()
+ print(f"Running {len(ENUM_CASES)} dead-dep enumeration cases")
+ print()
+ enum_rc = run_enum_cases()
+
+ print()
+ print(
+ f"Running {len(WRAPPER_CASES)} script-wrapper cases "
+ "(_next_real_bin + scripts_bin_refs end-to-end)"
+ )
+ print()
+ wrap_rc = run_wrapper_cases()
+
+ if (
+ passed == total
+ and cls_rc == 0
+ and adv_rc == 0
+ and pkg_rc == 0
+ and enum_rc == 0
+ and wrap_rc == 0
+ ):
+ return 0
+ return 1
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
]