diff --git a/.github/workflows/studio-frontend-ci.yml b/.github/workflows/studio-frontend-ci.yml index 3632125ca..1270a57ef 100644 --- a/.github/workflows/studio-frontend-ci.yml +++ b/.github/workflows/studio-frontend-ci.yml @@ -15,6 +15,8 @@ on: pull_request: paths: - 'studio/frontend/**' + - 'scripts/check_frontend_dep_removal.py' + - 'tests/studio/test_frontend_dep_removal.py' - '.github/workflows/studio-frontend-ci.yml' push: branches: [main, pip] @@ -84,6 +86,26 @@ jobs: exit 1 fi + # Catch the common foot-gun: a dep dropped from package.json that is + # still imported somewhere. The script walks the lockfile dep graph + # from the new top-level deps and only counts top-level node_modules + # paths as valid resolution targets for bare src/ imports. + # + # actions/checkout uses fetch-depth: 1 by default, so the base branch + # is not available locally. Fetch the single base commit with an + # explicit refspec so origin/ is reliably created (a bare + # `git fetch origin ` only updates FETCH_HEAD in some configs). + - name: Dependency removal safety check + if: github.event_name == 'pull_request' + working-directory: ${{ github.workspace }} + run: | + git fetch --no-tags --depth=1 origin \ + "${{ github.base_ref }}:refs/remotes/origin/${{ github.base_ref }}" + python3 scripts/check_frontend_dep_removal.py \ + --base "origin/${{ github.base_ref }}" \ + --enumerate-dead + python3 tests/studio/test_frontend_dep_removal.py + - name: Typecheck run: npm run typecheck diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py new file mode 100644 index 000000000..260ad5215 --- /dev/null +++ b/scripts/check_frontend_dep_removal.py @@ -0,0 +1,1195 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: AGPL-3.0-only +# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. +"""Guard against breaking npm dependency removals in studio/frontend. + +Diffs the current package.json against a git base, finds every package +that was removed, and confirms each is no longer referenced anywhere +in the repo. If a removed package is still imported and is not +transitively resolvable through the new lockfile, exits non-zero with +file:line citations. + +Usage: + python scripts/check_frontend_dep_removal.py + python scripts/check_frontend_dep_removal.py --base origin/main + python scripts/check_frontend_dep_removal.py --base HEAD~1 + python scripts/check_frontend_dep_removal.py --base-pkg PATH --head-lock PATH + +Exit codes: + 0 every removed dep is safe (no source refs or still resolvable) + 1 at least one removed dep is referenced and not resolvable + 2 invocation error (bad args, missing file, git error) +""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +FRONTEND_PKG = "studio/frontend/package.json" +FRONTEND_LOCK = "studio/frontend/package-lock.json" + +DEP_FIELDS = ( + "dependencies", + "devDependencies", + "peerDependencies", + "optionalDependencies", +) + +# Sources where seeing a package name does NOT count as usage. +EXPECTED_NOISE_FILES = { + "studio/frontend/package.json", + "studio/frontend/package-lock.json", + "studio/backend/core/data_recipe/oxc-validator/package.json", + "studio/backend/core/data_recipe/oxc-validator/package-lock.json", +} + +# Only quoted-string occurrences in these file types can be module specifiers. +JS_LIKE_EXT = re.compile( + r"\.(ts|tsx|js|jsx|mjs|cjs|html|htm|css|scss|sass|json|jsonc)$" +) +# Files where JS-syntactic import patterns (static/dynamic/require/re-export) +# could be a real module reference. Markdown gets a separate gate (.mdx is +# real ESM; .md code fences are not). +SCRIPT_LIKE_EXT = re.compile(r"\.(ts|tsx|js|jsx|mjs|cjs|mdx)$") +STYLE_EXT = re.compile(r"\.(css|scss|sass)$") +HTML_EXT = re.compile(r"\.(html|htm)$") +TS_LIKE_EXT = re.compile(r"\.(ts|tsx|mts|cts|mdx)$") +# Files where a removed package's CLI binary could be invoked (npx, bunx, +# yarn dlx, pnpm exec, or a bare `pkg --flag` shell call). +COMMAND_LIKE_EXT = re.compile(r"(\.(ya?ml|sh|ps1|bat)$|(^|/)Dockerfile[^/]*$)") + +GREP_INCLUDES = [ + "--include=*.ts", + "--include=*.tsx", + "--include=*.js", + "--include=*.jsx", + "--include=*.mjs", + "--include=*.cjs", + "--include=*.html", + "--include=*.htm", + "--include=*.css", + "--include=*.scss", + "--include=*.sass", + "--include=*.json", + "--include=*.jsonc", + "--include=*.md", + "--include=*.mdx", + "--include=*.py", + "--include=*.rs", + "--include=*.toml", + "--include=*.yml", + "--include=*.yaml", + "--include=*.sh", + "--include=*.ps1", + "--include=*.bat", + "--include=Dockerfile*", +] +GREP_EXCLUDES = [ + "--exclude-dir=node_modules", + "--exclude-dir=dist", + "--exclude-dir=.git", + "--exclude-dir=__pycache__", + "--exclude-dir=target", + "--exclude-dir=.next", + "--exclude-dir=build", + "--exclude-dir=.venv", + "--exclude-dir=venv", +] + +# A pip-installed playwright reference is the PyPI package, not npm. +PIP_PLAYWRIGHT = re.compile( + r"(pip\s+install\s+['\"]?playwright" + r"|python\s+-m\s+playwright" + r"|from\s+playwright" + r"|^\s*import\s+playwright)" +) + + +@dataclass +class Hit: + file: str + line: int + kind: str + snippet: str + + +def run(cmd: list[str], cwd: Path | None = None) -> str: + """Run a command, return stdout. On non-zero exit, return ''.""" + res = subprocess.run( + cmd, + cwd = cwd or REPO_ROOT, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE, + text = True, + ) + return res.stdout if res.returncode == 0 else "" + + +def read_pkg_at(base: str, path: str) -> dict: + """Read JSON at `base:path` via git show. Empty dict if missing.""" + out = run(["git", "show", f"{base}:{path}"]) + if not out.strip(): + return {} + return json.loads(out) + + +def read_pkg_file(path: Path) -> dict: + if not path.exists(): + return {} + return json.loads(path.read_text(encoding = "utf-8")) + + +def all_decl_names(pkg: dict) -> set[str]: + names: set[str] = set() + for field in DEP_FIELDS: + names.update((pkg.get(field) or {}).keys()) + return names + + +def _resolve_install_path(parent_path: str, name: str, pkgs: dict) -> str | None: + """Walk up the nested node_modules chain from `parent_path` to find + where `name` actually resolves. Mirrors Node module resolution. + """ + parts = parent_path.split("/node_modules/") + for i in range(len(parts), 0, -1): + prefix = "/node_modules/".join(parts[:i]) + trial = (prefix + "/node_modules/" if prefix else "node_modules/") + name + if trial in pkgs: + return trial + if f"node_modules/{name}" in pkgs: + return f"node_modules/{name}" + return None + + +def _deps_of(meta: dict) -> dict: + """Deps npm actually installs. Optional peers are skipped: npm only + installs them when another package declares the same dep, so for the + purpose of "is this package still reachable" they cannot keep a + removed top-level dep alive on their own. + """ + out = {} + for field in ("dependencies", "optionalDependencies"): + out.update(meta.get(field) or {}) + peer_meta = meta.get("peerDependenciesMeta") or {} + for name, spec in (meta.get("peerDependencies") or {}).items(): + if (peer_meta.get(name) or {}).get("optional"): + continue + out[name] = spec + return out + + +def reachable_from_head(head_pkg: dict, lock: dict) -> set[str]: + """BFS the lockfile dep graph starting from `head_pkg`'s top-level + declared deps. Returns the set of lockfile install paths that survive. + Stale lockfile entries (orphaned by the new package.json) are excluded. + """ + pkgs = lock.get("packages", {}) + if not pkgs: + return set() + roots = all_decl_names(head_pkg) + seen: set[str] = set() + frontier: list[str] = [] + for name in roots: + p = _resolve_install_path("", name, pkgs) + if p: + frontier.append(p) + while frontier: + path = frontier.pop() + if path in seen: + continue + seen.add(path) + meta = pkgs.get(path, {}) + for dep_name in _deps_of(meta): + p = _resolve_install_path(path, dep_name, pkgs) + if p and p not in seen: + frontier.append(p) + return seen + + +def classify(pkg: str, file: str, content: str) -> str | None: + """Return why `content` references `pkg`, or None. + + `content` may span multiple lines (for multi-line imports/exports); + each pattern uses re.DOTALL where it matters. The bare-spec + regexes use a word-boundary check on the package name so that + `foobar` does not match `foo`. + + File-type gating: JS-syntactic patterns only fire on .ts/.tsx/.js/.jsx/ + .mjs/.cjs/.mdx files, so an `import x from "pkg"` snippet inside a + Python test fixture or a Markdown code block is not mistaken for a + real npm usage. CSS patterns only fire on .css/.scss/.sass. HTML + patterns only fire on .html/.htm. + """ + if file in EXPECTED_NOISE_FILES: + return None + + esc = re.escape(pkg) + # Subpath gate: after the package name, the next char must be either + # the closing quote, `/`, or end-of-string. Prevents foo matching foobar. + sub = r"(?:/[^'\"`]*)?" + + flags_dotall = re.DOTALL | re.MULTILINE + + is_script = bool(SCRIPT_LIKE_EXT.search(file)) + is_style = bool(STYLE_EXT.search(file)) + is_html = bool(HTML_EXT.search(file)) + is_ts = bool(TS_LIKE_EXT.search(file)) + + # If the file is none of script / style / html / json (which is the + # quoted-string fallback surface) and is not an mdx file, no classify + # rule applies. This is what gates out Python fixtures, Markdown code + # blocks, shell snippets, etc. + is_json = file.endswith(".json") or file.endswith(".jsonc") + if not (is_script or is_style or is_html or is_json): + return None + + # CSS @import is checked first so it does not collide with the + # side-effect-import regex below. + if is_style and re.search(rf"@import\s+['\"]{esc}{sub}['\"]", content): + return "css_import" + # Static imports: handle multi-line `import { ... } from "pkg"` by + # allowing arbitrary content (newlines included) between `import` + # and `from`. The non-greedy match plus the required `from` keeps + # this scoped to a single statement. + if is_script and re.search( + rf"(?]*src\s*=\s*['\"][^'\"]*/{html_pkg}", content + ): + return "html_script" + if is_html and re.search(rf"]*href\s*=\s*['\"][^'\"]*/{html_pkg}", content): + return "html_link" + # TypeScript triple-slash + if is_ts and re.search( + rf"///\s* list[str]: + """Return a list of warnings if package-lock.json's dep map + disagrees with package.json (i.e., npm install was not re-run). + """ + warnings = [] + if not head_lock: + return warnings + root = head_lock.get("packages", {}).get("", {}) + lock_decl = { + **(root.get("dependencies") or {}), + **(root.get("devDependencies") or {}), + **(root.get("peerDependencies") or {}), + **(root.get("optionalDependencies") or {}), + } + pkg_decl = {} + for f in DEP_FIELDS: + pkg_decl.update(head_pkg.get(f) or {}) + only_in_lock = set(lock_decl) - set(pkg_decl) + only_in_pkg = set(pkg_decl) - set(lock_decl) + if only_in_lock: + warnings.append( + f"lockfile lists deps not in package.json (lockfile stale): {sorted(only_in_lock)}" + ) + if only_in_pkg: + warnings.append( + f"package.json declares deps not in lockfile (run npm install): {sorted(only_in_pkg)}" + ) + return warnings + + +def types_orphan_warnings(head_pkg: dict) -> list[str]: + """Flag @types/ deps where is no longer declared anywhere + in package.json. Removing X without also dropping @types/X leaves + dangling type packages. + """ + decl = set() + for f in DEP_FIELDS: + decl.update((head_pkg.get(f) or {}).keys()) + warnings = [] + for name in decl: + if not name.startswith("@types/"): + continue + # @types/foo provides types for `foo` + # @types/foo-bar provides types for `foo-bar` + # @types/scope__pkg provides types for `@scope/pkg` + target = name[len("@types/") :] + if "__" in target: + scope, sub = target.split("__", 1) + target = f"@{scope}/{sub}" + if target == "node": + continue # Node.js types are always implicit + if target not in decl: + warnings.append( + f"@types/{target.replace('@', '').replace('/', '__')} present but '{target}' is not declared" + ) + return warnings + + +_PKG_JSON_SKIP_KEYS = { + "dependencies", + "devDependencies", + "peerDependencies", + "optionalDependencies", + "bundleDependencies", + "bundledDependencies", +} + +# Top-level fields whose contents are never package references. We walk +# everything else recursively. +_PKG_JSON_OPAQUE_KEYS = { + "browserslist", # browser queries + "keywords", # free-form strings + "engines", # node/npm version constraints + "engineStrict", # bool + "packageManager", # `pnpm@9.0.0` -- the package manager binary + "volta", # version pins for node/npm/yarn + "files", # paths included in publish + "directories", # paths + "publishConfig", # registry / access config + "config", # generic npm config values + "main", + "module", + "browser", + "types", + "typings", + "type", + "exports", + "imports", + "bin", + "man", # author-side fields (not consumer refs) + "scripts", # handled separately via scripts_bin_refs() + "repository", + "bugs", + "homepage", + "funding", + "author", + "contributors", + "maintainers", + "license", + "licenses", + "name", + "version", + "description", + "private", + "sideEffects", + "workspaces", # paths/globs, NOT pkg names +} + + +def package_json_extra_refs(pkg: dict, target: str) -> list[str]: + """Walk every key/value in package.json EXCEPT the dep declaration + blocks, and return citations for string values or dict keys that + equal `target` (or `target/subpath`). + + Catches the patterns the public dep-checker tools commonly miss: + - `overrides` / `resolutions` / `pnpm.overrides` keys + - `pnpm.patchedDependencies` keys + - `peerDependenciesMeta` keys + - `prettier`: "@my/prettier-config" + - `eslintConfig.extends`: ["..."] / "..." + - `stylelint.extends` / `stylelint.plugins` + - `babel.presets` / `babel.plugins` + - `jest.preset` / `jest.setupFiles` / `jest.transform` + - `commitlint.extends`, `renovate.extends`, `remarkConfig.plugins` + """ + target_sub = target + "/" + cites: list[str] = [] + + def matches(s: object) -> bool: + return isinstance(s, str) and (s == target or s.startswith(target_sub)) + + def walk(obj: object, path: str) -> None: + if isinstance(obj, dict): + for k, v in obj.items(): + # Skip top-level dep declaration fields entirely. + if path == "" and k in _PKG_JSON_SKIP_KEYS: + continue + # Top-level fields whose contents are never package refs. + if path == "" and k in _PKG_JSON_OPAQUE_KEYS: + continue + # Inside `overrides` / `resolutions` / etc., the KEY itself + # is a package reference. + if matches(k): + cites.append(f"{path}.{k}" if path else k) + walk(v, f"{path}.{k}" if path else k) + elif isinstance(obj, list): + for i, v in enumerate(obj): + walk(v, f"{path}[{i}]") + elif isinstance(obj, str): + if matches(obj): + cites.append(f"{path}: {obj}") + + walk(pkg, "") + return cites + + +def build_bin_to_pkg(head_lock: dict) -> dict[str, str]: + """Map a binary name (e.g. 'vite', 'tsc', 'eslint') to the package + that provides it. Built from each lockfile entry's `bin` field. + """ + out: dict[str, str] = {} + if not head_lock: + return out + for path, meta in head_lock.get("packages", {}).items(): + if not path: + continue + name = path.split("node_modules/")[-1] + bins = meta.get("bin") + if isinstance(bins, dict): + for binname in bins: + out.setdefault(binname, name) + elif isinstance(bins, str): + out.setdefault(name.split("/")[-1], name) + return out + + +_SCRIPT_TOKENIZE = re.compile(r"\s*(?:&&|\|\||;|\|(?!\|))\s*") + +# Wrappers that delegate to a real CLI in the same shell word list. +# After stripping env prefixes and (optionally) `npx`/`pnpm exec`/`yarn dlx`/ +# `bunx`, if the leading token is one of these we advance past the +# wrapper's own flags and any further env-prefix tokens, then re-check. +# `cross-env` is the common one; `dotenv-cli` / `dotenvx` use `--` as a +# separator. Wrappers that operate on named npm-scripts (concurrently, +# npm-run-all, run-s, run-p, wireit, turbo, nx) intentionally aren't +# here -- they reference script names, not bin names, so the real bin +# is in the *target* script's chunk which we already tokenize. +_SCRIPT_WRAPPERS = {"cross-env", "dotenv", "dotenvx", "env-cmd"} +_ENV_PREFIX_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=") + + +def _next_real_bin(words: list[str], idx: int) -> str | None: + """Walk `words` from `idx`, peeling env-prefix tokens, the leading + package-manager runner (`npx`, `pnpm exec`, etc.), and the known + wrapper bins. Return the next token that looks like the real CLI + binary, or None if the chunk has nothing to look up. + + Recursion depth is bounded by the chunk's word count, so the loop + cannot run away on a pathological wrapper chain. + """ + seen_wrappers: set[str] = set() + while idx < len(words): + # 1. env-prefix run: `FOO=bar BAZ="a b" cmd ...`. shlex has + # already collapsed quoted values into one word, so this + # tokenizer is safe for them. + while idx < len(words) and _ENV_PREFIX_RE.match(words[idx]): + idx += 1 + if idx >= len(words): + return None + + first = words[idx] + # 2. Package-manager runner: `npx args`, `pnpm exec `, + # `yarn dlx `, `bunx `. Strip and continue (so the + # wrapped command goes through the same unwrap loop). + if first in {"npx", "pnpx", "bunx"} and idx + 1 < len(words): + idx += 1 + continue + if ( + first in {"pnpm", "yarn"} + and idx + 2 < len(words) + and words[idx + 1] in {"exec", "dlx"} + ): + idx += 2 + continue + + # 3. Wrapper bin (cross-env, dotenv, etc.). Skip the wrapper's + # own flags and any subsequent env-prefix tokens, then re-loop. + bin_token = first.removeprefix("./node_modules/.bin/").removeprefix( + "node_modules/.bin/" + ) + if bin_token in _SCRIPT_WRAPPERS and bin_token not in seen_wrappers: + seen_wrappers.add(bin_token) + idx += 1 + # cross-env / env-cmd: no flags; just more env-prefix tokens. + # dotenv / dotenvx: skip `-e ` style flags and the + # optional `--` separator before the wrapped command. + while idx < len(words): + tok = words[idx] + if tok.startswith("-") and tok != "--": + idx += 1 + # `-e .env` style: also skip the flag's argument + # when it does not look like another flag. + if ( + idx < len(words) + and not words[idx].startswith("-") + and not _ENV_PREFIX_RE.match(words[idx]) + ): + idx += 1 + continue + if tok == "--": + idx += 1 + break + break + continue + return bin_token + return None + + +def scripts_bin_refs( + head_pkg: dict, bin_to_pkg: dict[str, str] +) -> dict[str, list[str]]: + """Return `{package_name: ['scripts.X: cmd', ...]}` listing every + package referenced via its bin name in package.json scripts. + + Each script value is split on shell separators (`&&`, `||`, `;`, + `|`). Within each chunk, `_next_real_bin()` unwraps env prefixes, + package-manager runners (`npx` / `pnpm exec` / `yarn dlx` / `bunx`), + and wrapper bins like `cross-env` / `dotenv` so that + `cross-env CI=1 biome check` correctly credits `biome` to its + declaring package. + + Tokenization uses shlex.split so quoted env values + (`FOO="a b" biome`) survive unbroken. + """ + import shlex + + scripts = head_pkg.get("scripts", {}) or {} + refs: dict[str, list[str]] = {} + for script_name, raw_cmd in scripts.items(): + if not isinstance(raw_cmd, str): + continue + for chunk in _SCRIPT_TOKENIZE.split(raw_cmd): + chunk = chunk.strip() + if not chunk: + continue + try: + words = shlex.split(chunk, posix = True) + except ValueError: + # Unbalanced quotes -- fall back to plain split. + words = chunk.split() + if not words: + continue + bin_name = _next_real_bin(words, 0) + if bin_name is None: + continue + pkg = bin_to_pkg.get(bin_name) + if pkg: + refs.setdefault(pkg, []).append(f"scripts.{script_name}: {raw_cmd}") + return refs + + +def tsconfig_compiler_types_refs() -> set[str]: + """Read studio/frontend/tsconfig*.json and return the set of + package names referenced in compilerOptions.types arrays. These are + implicitly loaded by tsc and count as a real use even though they + have no explicit import. + """ + out: set[str] = set() + base = REPO_ROOT / "studio/frontend" + for name in ("tsconfig.json", "tsconfig.app.json", "tsconfig.node.json"): + path = base / name + if not path.exists(): + continue + try: + text = path.read_text() + # tsconfig allows comments; strip simple line comments. + text = re.sub(r"//[^\n]*", "", text) + data = json.loads(text) + except (OSError, json.JSONDecodeError): + continue + types = (data.get("compilerOptions", {}) or {}).get("types", []) or [] + for t in types: + if not isinstance(t, str): + continue + # `vite/client` resolves to `vite` package. + pkg = ( + t.split("/", 1)[0] + if not t.startswith("@") + else "/".join(t.split("/", 2)[:2]) + ) + out.add(pkg) + return out + + +def enumerate_dep_usage(head_pkg: dict, head_lock: dict) -> dict[str, list]: + """For every declared dep, classify whether it appears used. Returns + a dict with these categories: + - used: has at least one detected usage in src/, + config files, scripts.bin, package.json + field refs, or tsconfig types + - unused: no detected usage anywhere + - type_pkg_kept: @types/X where X is still declared + - type_pkg_orphan: @types/X where X is no longer declared + (or X is removed) -- candidate for removal + + Each entry is the package name. The categorisation is opinionated; + `unused` is a CANDIDATE list, not a guarantee. The caller should + verify before deletion. + """ + decl = all_decl_names(head_pkg) + bin_to_pkg = build_bin_to_pkg(head_lock) if head_lock else {} + script_refs = scripts_bin_refs(head_pkg, bin_to_pkg) + tsc_types = tsconfig_compiler_types_refs() + + results: dict[str, list] = { + "used": [], + "unused": [], + "type_pkg_kept": [], + "type_pkg_orphan": [], + } + for name in sorted(decl): + if name.startswith("@types/"): + target = name[len("@types/") :] + if "__" in target: + scope, sub = target.split("__", 1) + target = f"@{scope}/{sub}" + if target == "node": + results["type_pkg_kept"].append(name) + elif target in decl: + results["type_pkg_kept"].append(name) + else: + results["type_pkg_orphan"].append(name) + continue + # Real-source-usage check + hits = find_usage(name) + used = bool(hits) + # CLI usage in shell / workflow / Dockerfile surfaces. Skip for + # `@types/*` packages because they never expose a CLI binary and + # the unscoped-tail bin name candidate would scan workflow files + # for the bare runtime name (a removed `@types/foo` would look + # for invocations of `foo`). + if not used and not name.startswith("@types/") and find_command_usage(name): + used = True + # Bin scripts + if not used and name in script_refs: + used = True + # package.json non-dep field references + if not used and package_json_extra_refs(head_pkg, name): + used = True + # tsconfig compilerOptions.types implicit usage + if not used and name in tsc_types: + used = True + if used: + results["used"].append(name) + else: + results["unused"].append(name) + return results + + +def find_imports_without_decl(head_pkg: dict) -> list[tuple[str, int, str]]: + """Reverse check: find bare-specifier imports in studio/frontend/src + that don't correspond to any declared package.json dep. Catches the + case where someone adds an import but forgets the dep declaration. + Returns (file, line, spec) tuples. + + Match shapes covered: + import "pkg" + import Foo from "pkg" + import { Foo } from "pkg" + import type { Foo } from "pkg" + const x = require("pkg") + const x = await import("pkg") + """ + decl = set() + for f in DEP_FIELDS: + decl.update((head_pkg.get(f) or {}).keys()) + # Also: anything tsconfig path-aliases (just '@/...' here) is internal. + # The capture group is the specifier; the leading alternation accepts + # any of: `from "..."`, bare side-effect `import "..."`, + # `import("..."), or `require("...")`. We exclude relative paths and + # the `@/` alias prefix by requiring the first char of the specifier + # to be neither `.` nor `/`. + pattern = ( + r"(?:\bfrom\s+|" + r"\bimport\s+(?:\(\s*)?|" + r"\brequire(?:\.resolve)?\(\s*)" + r"['\"]([^'\"./][^'\"]*)['\"]" + ) + args = [ + "grep", + "-rnE", + pattern, + "--include=*.ts", + "--include=*.tsx", + "--include=*.js", + "--include=*.jsx", + "studio/frontend/src", + ] + out = run(args) + missing = [] + for line in out.splitlines(): + m = re.match(r"^(?:\./)?([^:]+):(\d+):(.*)$", line) + if not m: + continue + file, ln, content = m.group(1), int(m.group(2)), m.group(3) + for spec_match in re.finditer(pattern, content): + spec = spec_match.group(1) + # Resolve to package name (strip subpath) + if spec.startswith("@"): + parts = spec.split("/", 2) + pkg_name = "/".join(parts[:2]) if len(parts) >= 2 else spec + else: + pkg_name = spec.split("/", 1)[0] + if pkg_name in decl: + continue + # Internal aliases like '@/foo' or starts with builtin names + if pkg_name == "@": + continue + if pkg_name in { + "node:fs", + "node:path", + "fs", + "path", + "url", + "stream", + "crypto", + "buffer", + "util", + "events", + "child_process", + }: + continue + missing.append((file, ln, spec)) + return missing + + +def grep_repo(pat: str) -> list[tuple[str, int, str]]: + args = ["grep", "-rnE", pat] + GREP_INCLUDES + GREP_EXCLUDES + ["."] + out = run(args) + rows = [] + for line in out.splitlines(): + m = re.match(r"^(\./)?([^:]+):(\d+):(.*)$", line) + if m: + rows.append((m.group(2), int(m.group(3)), m.group(4))) + return rows + + +_file_lines_cache: dict[str, list[str]] = {} + + +def _read_file(path: str) -> list[str]: + if path not in _file_lines_cache: + try: + _file_lines_cache[path] = ( + Path(path).read_text(errors = "replace").splitlines() + ) + except (OSError, UnicodeDecodeError): + _file_lines_cache[path] = [] + return _file_lines_cache[path] + + +def find_usage(pkg: str) -> list[Hit]: + """Return real usages of `pkg`. Filters pip-playwright separately. + + For each filename returned by grep, also feed a multi-line window + around the matching line into classify() so multi-line imports + (`import {\n a\n} from "pkg"`) get picked up. + """ + rows = grep_repo(re.escape(pkg)) + hits = [] + seen_keys: set[tuple[str, str]] = set() + for file, lineno, content in rows: + if pkg == "playwright" and PIP_PLAYWRIGHT.search(content): + continue + # Try the single-line classify first. + kind = classify(pkg, file, content) + if not kind: + # Multi-line window: a generous 25 lines above + the line + + # 25 below so Prettier's one-import-per-line formatting for + # 12-20+ named imports still includes the `import` keyword + # in the same window as the `from "pkg"` clause. + lines = _read_file(file) + lo = max(0, lineno - 26) + hi = min(len(lines), lineno + 25) + window = "\n".join(lines[lo:hi]) + kind = classify(pkg, file, window) + if kind: + key = (file, kind) + if key in seen_keys: + continue + seen_keys.add(key) + hits.append(Hit(file, lineno, kind, content[:160])) + return hits + + +def _candidate_bin_names(pkg: str) -> set[str]: + """Names a removed package's CLI could be invoked under in shell + scripts and workflow files. Most npm CLIs use the package name + (`vite`, `eslint`, `playwright`); scoped CLI packages commonly + expose an unscoped binary name (`@biomejs/biome` -> `biome`). + """ + return {pkg, pkg.rsplit("/", 1)[-1]} + + +def find_command_usage(pkg: str) -> list[Hit]: + """Find package CLI invocations in shell / workflow / Dockerfile + surfaces: `npx pkg`, `bunx pkg`, `pnpm exec pkg`, `yarn dlx pkg`, + or a bare `pkg --flag`. Returns Hit("command_bin"). + + Detection is bounded to COMMAND_LIKE_EXT files so a JS string that + happens to contain `npx foo` inside a TS test fixture is not + mistaken for a real invocation. + """ + bins = sorted(_candidate_bin_names(pkg), key = len, reverse = True) + esc_bins = "|".join(re.escape(b) for b in bins) + # grep ERE pattern (POSIX classes for whitespace/word boundaries). + # Build without f-strings to avoid f-string-vs-{} confusion with the + # POSIX `[[:space:]]` literals and trailing `})}` boundary class. + grep_pat = ( + r"(^|[[:space:]:;&|(\[])" + r"(npx[[:space:]]+|pnpm[[:space:]]+exec[[:space:]]+" + r"|yarn[[:space:]]+(dlx[[:space:]]+)?|bunx[[:space:]]+)?" + r"(" + esc_bins + r")" + r"([[:space:])};|\]]|$)" + ) + py_pat = re.compile( + r"(^|[\s:;&|(\[])" + r"(?:npx\s+|pnpm\s+exec\s+|yarn\s+(?:dlx\s+)?|bunx\s+)?" + r"(" + esc_bins + r")" + r"([\s)};|\]]|$)" + ) + hits: list[Hit] = [] + seen: set[tuple[str, int]] = set() + for file, lineno, content in grep_repo(grep_pat): + if not COMMAND_LIKE_EXT.search(file): + continue + if pkg == "playwright" and PIP_PLAYWRIGHT.search(content): + continue + if not py_pat.search(content): + continue + key = (file, lineno) + if key in seen: + continue + seen.add(key) + hits.append(Hit(file, lineno, "command_bin", content[:160])) + return hits + + +def types_target_name(pkg: str) -> str | None: + """Strip `@types/` prefix and decode the npm scope-encoding so the + return value matches the runtime package name. `@types/foo` -> `foo`, + `@types/foo__bar` -> `@foo/bar`. Returns None for non-@types packages. + """ + if not pkg.startswith("@types/"): + return None + target = pkg[len("@types/") :] + if "__" in target: + scope, sub = target.split("__", 1) + return f"@{scope}/{sub}" + return target + + +def find_types_runtime_usage(pkg: str, tsc_types: set[str]) -> list[Hit]: + """For a removed `@types/X`, find usages of `X` itself: explicit + `/// `, `tsconfig.compilerOptions.types: ["X"]`, + and runtime `import "X"` shapes. The whole point of `@types/X` is to + type one of those; if any are present, the type package must stay. + """ + target = types_target_name(pkg) + if target is None: + return [] + hits = find_usage(target) + if target in tsc_types: + hits.append( + Hit( + "studio/frontend/tsconfig*.json", + 0, + "tsconfig_types", + f'compilerOptions.types includes "{target}"', + ) + ) + return hits + + +def main() -> int: + p = argparse.ArgumentParser( + description = __doc__, formatter_class = argparse.RawTextHelpFormatter + ) + p.add_argument( + "--base", + default = "origin/main", + help = "git ref to diff against (default: origin/main). " + "Examples: HEAD~1, main, a-tag, a-sha.", + ) + p.add_argument( + "--base-pkg", help = "optional override: read base package.json from this path" + ) + p.add_argument( + "--base-lock", + help = "optional override: read base package-lock.json from this path. " + "Used to recover the bin -> package mapping for removed packages so " + "scripts.foo still flags as a usage even after the PR drops node_modules/foo.", + ) + p.add_argument( + "--head-pkg", + default = str(REPO_ROOT / FRONTEND_PKG), + help = "head package.json path (default: working tree)", + ) + p.add_argument( + "--head-lock", + default = str(REPO_ROOT / FRONTEND_LOCK), + help = "head lockfile path (default: working tree). " + "Reachability analysis runs against this lockfile.", + ) + p.add_argument("--verbose", action = "store_true") + p.add_argument( + "--strict", + action = "store_true", + help = "Also fail on hygiene warnings (lockfile sync, " + "@types orphans, imports without declared dep, unused deps).", + ) + p.add_argument( + "--enumerate-dead", + action = "store_true", + help = "Print every declared dep that appears unused anywhere " + "in the repo. Informational; does not fail unless --strict.", + ) + args = p.parse_args() + + if args.base_pkg: + base_pkg = read_pkg_file(Path(args.base_pkg)) + else: + base_pkg = read_pkg_at(args.base, FRONTEND_PKG) + head_pkg = read_pkg_file(Path(args.head_pkg)) + if not base_pkg: + print( + f"ERROR: could not read base package.json at {args.base}:{FRONTEND_PKG}", + file = sys.stderr, + ) + return 2 + if not head_pkg: + print( + f"ERROR: could not read head package.json at {args.head_pkg}", + file = sys.stderr, + ) + return 2 + + head_lock_path = Path(args.head_lock) + if not head_lock_path.exists(): + print( + f"ERROR: head lockfile not found at {head_lock_path}", + file = sys.stderr, + ) + return 2 + head_lock = read_pkg_file(head_lock_path) + + # Base lockfile is best-effort. We use it only to recover the + # bin -> package mapping for packages the PR is removing -- so a + # `scripts.biome:check` cite still fires when `@biomejs/biome` is + # being dropped and the head lockfile no longer has it. + if args.base_lock: + base_lock_path = Path(args.base_lock) + base_lock = read_pkg_file(base_lock_path) if base_lock_path.exists() else {} + else: + base_lock = read_pkg_at(args.base, FRONTEND_LOCK) + + base_names = all_decl_names(base_pkg) + head_names = all_decl_names(head_pkg) + removed = sorted(base_names - head_names) + + # All hygiene checks compute up front so they can run on both the + # removal-present and removal-empty paths (so `--strict` actually + # fails when only hygiene issues exist). + sync_warns = lockfile_root_sync(head_pkg, head_lock) + types_warns = types_orphan_warnings(head_pkg) + missing_imports = find_imports_without_decl(head_pkg) + enum = enumerate_dep_usage(head_pkg, head_lock) if args.enumerate_dead else None + + def _print_hygiene() -> None: + if sync_warns: + print("Lockfile sync warnings:") + for w in sync_warns: + print(f" - {w}") + print() + if types_warns: + print("@types orphan warnings:") + for w in types_warns: + print(f" - {w}") + print() + if missing_imports: + print( + f"Imports without a matching package.json dep ({len(missing_imports)}):" + ) + for file, ln, spec in missing_imports[:20]: + print(f" - {file}:{ln} imports '{spec}'") + print() + if enum is not None: + print("Dead-dep enumeration:") + if enum["unused"]: + print(f" unused ({len(enum['unused'])}):") + for n in enum["unused"]: + print(f" - {n}") + else: + print(" unused: none") + if enum["type_pkg_orphan"]: + print(f" type_pkg_orphan ({len(enum['type_pkg_orphan'])}):") + for n in enum["type_pkg_orphan"]: + print(f" - {n}") + if args.verbose: + print(f" used: {len(enum['used'])}") + print(f" type_pkg_kept: {len(enum['type_pkg_kept'])}") + print() + + hygiene_strict_fail = args.strict and ( + sync_warns + or types_warns + or missing_imports + or (enum is not None and (enum["unused"] or enum["type_pkg_orphan"])) + ) + + if not removed: + print("[OK] no dependencies removed from studio/frontend/package.json") + if args.enumerate_dead or sync_warns or types_warns or missing_imports: + print() + _print_hygiene() + if hygiene_strict_fail: + print("FAIL (--strict): one or more hygiene warnings present") + return 1 + return 0 + + print( + f"Checking {len(removed)} removed package(s) from studio/frontend/package.json" + ) + print(f"Base: {args.base} Head: working tree") + print() + + reachable_paths = reachable_from_head(head_pkg, head_lock) if head_lock else set() + # bin -> package map: start from the head lockfile, then layer the + # base lockfile's entries on top for packages this PR is removing. + # A correct removal updates the head lockfile to drop node_modules/foo, + # so build_bin_to_pkg(head_lock) loses the mapping; we recover it + # from the base lockfile so `scripts.biome:check` still flags as a + # usage when `@biomejs/biome` is being dropped. + bin_to_pkg = build_bin_to_pkg(head_lock) if head_lock else {} + base_bin_to_pkg = build_bin_to_pkg(base_lock) if base_lock else {} + removed_set = set(removed) + for bin_name, pkg_name in base_bin_to_pkg.items(): + if pkg_name in removed_set: + bin_to_pkg.setdefault(bin_name, pkg_name) + script_refs = scripts_bin_refs(head_pkg, bin_to_pkg) + tsc_types = tsconfig_compiler_types_refs() + + def reachable_install_paths(name: str) -> tuple[str | None, list[str]]: + """Return (top_level_path, nested_paths). top_level is what bare + `import "name"` from src/ actually resolves to; nested copies are + only visible inside the parent package that nested them. + """ + top = f"node_modules/{name}" + top_path = top if top in reachable_paths else None + nested = sorted( + p + for p in reachable_paths + if p != top and p.endswith(f"/node_modules/{name}") + ) + return top_path, nested + + failures: list[tuple[str, list[Hit]]] = [] + for name in removed: + hits = find_usage(name) + # CLI invocations in shell scripts / workflows / Dockerfiles. + hits.extend(find_command_usage(name)) + # @types/X is "used" if X is referenced as a type or as a + # runtime import elsewhere in the repo. + hits.extend(find_types_runtime_usage(name, tsc_types)) + for cite in script_refs.get(name, []): + hits.append(Hit("studio/frontend/package.json", 0, "script_bin", cite)) + for cite in package_json_extra_refs(head_pkg, name): + hits.append(Hit("studio/frontend/package.json", 0, "pkg_json_field", cite)) + top, nested = reachable_install_paths(name) + importable_top_level = top is not None + # Source imports of bare specifier `name` resolve ONLY to top-level + # node_modules/. Nested copies under another package are + # invisible to src/ files. + if hits and not importable_top_level: + status = "FAIL" + elif hits and importable_top_level: + status = "OK-via-transitive" + else: + status = "OK" + print(f" [{status}] {name}") + if top: + print(f" reachable (top-level): {top}") + if nested: + print( + f" reachable (nested, NOT importable from src/): {nested[0]}" + + (f" (+{len(nested)-1} more)" if len(nested) > 1 else "") + ) + if hits: + for h in hits[:5]: + print(f" [{h.kind}] {h.file}:{h.line} {h.snippet}") + if status == "FAIL": + failures.append((name, hits)) + if args.verbose and not hits and not (top or nested): + print(" no references, not reachable -- clean removal") + + print() + + _print_hygiene() + + if failures: + print( + f"FAIL: {len(failures)} removed package(s) still referenced and not resolvable" + ) + for name, _ in failures: + print(f" - {name}") + return 1 + if hygiene_strict_fail: + print("FAIL (--strict): one or more hygiene warnings present") + return 1 + + print("PASS: all removed packages are safe to drop") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py new file mode 100644 index 000000000..a8e4cda8b --- /dev/null +++ b/tests/studio/test_frontend_dep_removal.py @@ -0,0 +1,1628 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: AGPL-3.0-only +# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. +"""Edge-case suite for scripts/check_frontend_dep_removal.py. + +Each case patches a copy of studio/frontend/package.json to remove (or +move) a specific dependency, invokes the checker against the real +working tree's lockfile, and asserts the verdict matches expectations. + +Run: + python tests/studio/test_frontend_dep_removal.py + +Exits 0 iff every case behaves as expected. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +import tempfile +from dataclasses import dataclass +from pathlib import Path + +REPO = Path(__file__).resolve().parents[2] +HEAD_PKG = REPO / "studio/frontend/package.json" +HEAD_LOCK = REPO / "studio/frontend/package-lock.json" +SCRIPT = REPO / "scripts/check_frontend_dep_removal.py" + + +@dataclass +class Case: + id: str + desc: str + remove: list[str] + expected_status: str # "PASS" | "FAIL" + expected_failures: list[str] + move_to_dev: list[str] | None = None # rare: deps moved, not removed + + +CASES: list[Case] = [ + Case( + "C1", + "removing next-themes breaks 2 src imports", + ["next-themes"], + "FAIL", + ["next-themes"], + ), + Case( + "C2", + "removing @xyflow/react breaks recipe-studio src imports " + "(no other declared dep pulls @xyflow/react)", + ["@xyflow/react"], + "FAIL", + ["@xyflow/react"], + ), + Case( + "C3", + "removing katex is safe: streamdown/math, mermaid, " + "rehype-katex all keep it at top level", + ["katex"], + "PASS", + [], + ), + Case("C4", "removing clsx is safe: streamdown keeps it", ["clsx"], "PASS", []), + Case( + "C5", + "removing react is safe: peer of countless packages", + ["react"], + "PASS", + [], + ), + Case( + "C6", + "removing @radix-ui/react-slot is safe: pulled by " + "radix-ui umbrella + @assistant-ui/react", + ["@radix-ui/react-slot"], + "PASS", + [], + ), + Case( + "C7", + "removing zustand is safe: @assistant-ui/react keeps " + "top-level zustand@5.x (nested xyflow 4.x is irrelevant " + "to src imports)", + ["zustand"], + "PASS", + [], + ), + Case( + "C8", + "multi-remove with mixed safety: next-themes + " + "@huggingface/hub + dexie all unsafe", + ["next-themes", "@huggingface/hub", "dexie"], + "FAIL", + ["next-themes", "@huggingface/hub", "dexie"], + ), + Case( + "C9", + "removing @huggingface/hub breaks 5+ src imports", + ["@huggingface/hub"], + "FAIL", + ["@huggingface/hub"], + ), + Case( + "C10", + "removing tailwind-merge is safe: streamdown keeps it", + ["tailwind-merge"], + "PASS", + [], + ), + Case( + "C11", + "removing a non-existent name is a no-op", + ["__never_existed_in_pkg__"], + "PASS", + [], + ), + Case( + "C12", + "moving @hugeicons/react from deps to devDeps is NOT a " + "removal (still declared)", + [], + "PASS", + [], + move_to_dev = ["@hugeicons/react"], + ), + Case( + "C13", + "removing @huggingface/hub AND @xyflow/react together: both " + "are root-only deps with no other parents, so both should FAIL", + ["@huggingface/hub", "@xyflow/react"], + "FAIL", + ["@huggingface/hub", "@xyflow/react"], + ), + Case( + "C14", + "removing dexie breaks src imports (no other declared " "dep needs it)", + ["dexie"], + "FAIL", + ["dexie"], + ), + Case( + "C15", + "removing motion (used in 20+ src imports including " + "framer-motion-style animations); no transitive parent", + ["motion"], + "FAIL", + ["motion"], + ), + Case( + "C16", + "removing canvas-confetti (imported in confetti.tsx); " "no transitive parent", + ["canvas-confetti"], + "FAIL", + ["canvas-confetti"], + ), + Case( + "C17", + "removing recharts (imported in chart.tsx); no transitive " "parent", + ["recharts"], + "FAIL", + ["recharts"], + ), + Case( + "C18", + "removing js-yaml is safe: @eslint/eslintrc keeps it " + "(triggers @types/js-yaml orphan warning, non-fatal)", + ["js-yaml"], + "PASS", + [], + ), + Case( + "C19", + "removing node-forge (imported in providers-api.ts); " "no transitive parent", + ["node-forge"], + "FAIL", + ["node-forge"], + ), + Case( + "C20", + "removing @tauri-apps/api is safe: all 5 @tauri-apps " + "plugins declare it as a direct dep", + ["@tauri-apps/api"], + "PASS", + [], + ), + Case( + "C21", + "removing mammoth (imported in runtime-provider.tsx); " "no transitive parent", + ["mammoth"], + "FAIL", + ["mammoth"], + ), + Case( + "C22", + "removing unpdf (imported in runtime-provider.tsx); " "no transitive parent", + ["unpdf"], + "FAIL", + ["unpdf"], + ), + Case( + "C23", + "removing remark-gfm is safe: streamdown declares it " "as a direct dep", + ["remark-gfm"], + "PASS", + [], + ), + Case( + "C24", + "removing date-fns is safe: react-day-picker and " + "@base-ui/react both declare it as a direct dep", + ["date-fns"], + "PASS", + [], + ), + Case( + "C25", + "removing vite is safe: @vitejs/plugin-react and @tailwindcss/vite " + "keep it via peer (bin still resolves)", + ["vite"], + "PASS", + [], + ), + Case( + "C26", + "removing typescript is safe: 11 transitive @typescript-eslint/* " + "parents keep tsc bin alive", + ["typescript"], + "PASS", + [], + ), + Case( + "C27", + "removing eslint is safe: typescript-eslint and eslint-plugin-* " + "peers keep eslint bin alive", + ["eslint"], + "PASS", + [], + ), + Case( + "C28", + "removing @biomejs/biome breaks scripts.biome:check / biome:fix " + "(no transitive parents, biome bin orphans)", + ["@biomejs/biome"], + "FAIL", + ["@biomejs/biome"], + ), + Case( + "C29", + "removing both @biomejs/biome AND @vitejs/plugin-react together: " + "biome dies outright; vite loses one of its two retained peers " + "but @tailwindcss/vite still keeps it", + ["@biomejs/biome", "@vitejs/plugin-react"], + "FAIL", + ["@biomejs/biome", "@vitejs/plugin-react"], + ), +] + + +def synth_head(head_pkg: dict, case: Case) -> dict: + out = json.loads(json.dumps(head_pkg)) + for name in case.remove: + for field in ( + "dependencies", + "devDependencies", + "peerDependencies", + "optionalDependencies", + ): + (out.get(field) or {}).pop(name, None) + if case.move_to_dev: + for name in case.move_to_dev: + v = (out.get("dependencies") or {}).pop(name, None) + if v is not None: + out.setdefault("devDependencies", {})[name] = v + return out + + +def run_case(case: Case, head_pkg: dict) -> tuple[bool, str]: + synth = synth_head(head_pkg, case) + with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f: + json.dump(synth, f, indent = 2) + synth_path = f.name + try: + proc = subprocess.run( + [ + sys.executable, + str(SCRIPT), + "--base-pkg", + str(HEAD_PKG), + "--head-pkg", + synth_path, + "--head-lock", + str(HEAD_LOCK), + ], + capture_output = True, + text = True, + ) + finally: + os.unlink(synth_path) + + actual_status = {0: "PASS", 1: "FAIL"}.get(proc.returncode, f"RC{proc.returncode}") + failure_pkgs: list[str] = [] + in_summary = False + for line in proc.stdout.splitlines(): + if "FAIL:" in line and "removed package" in line: + in_summary = True + continue + if in_summary and line.strip().startswith("- "): + failure_pkgs.append(line.strip()[2:]) + + ok = actual_status == case.expected_status and set(failure_pkgs) == set( + case.expected_failures + ) + return ok, ( + f"expected: status={case.expected_status} fails={sorted(case.expected_failures)}\n" + f"actual: status={actual_status} fails={sorted(failure_pkgs)}\n" + f"--- stdout (first 30 lines) ---\n" + "\n".join(proc.stdout.splitlines()[:30]) + ) + + +# --------------------------------------------------------------------------- +# Classifier unit tests: feed hand-crafted snippets directly into classify() +# and assert the returned kind. Covers sneaky import shapes that an +# adversarial / careless dev might use to obscure a real usage. +# --------------------------------------------------------------------------- + +# Import the script's classify() by file path so this test does not need +# the package to be installed. +import importlib.util as _ilu + +_spec = _ilu.spec_from_file_location("_dep_check", str(SCRIPT)) +_dep_check = _ilu.module_from_spec(_spec) +sys.modules["_dep_check"] = _dep_check # required so @dataclass can resolve annotations +_spec.loader.exec_module(_dep_check) +classify = _dep_check.classify +_next_real_bin = _dep_check._next_real_bin +scripts_bin_refs = _dep_check.scripts_bin_refs + + +@dataclass +class ClassifyCase: + id: str + desc: str + pkg: str + file: str + content: str + expected_kind: str | None # None means "no detection" + + +CLASSIFY_CASES: list[ClassifyCase] = [ + # Bog-standard shapes + ClassifyCase( + "U01", + "single-line static import", + "next-themes", + "src/x.tsx", + 'import { ThemeProvider } from "next-themes";', + "static_import", + ), + ClassifyCase( + "U02", + "side-effect import", + "katex", + "src/x.tsx", + 'import "katex/dist/katex.min.css";', + "side_effect_import", + ), + ClassifyCase( + "U03", + "dynamic import", + "@tauri-apps/api", + "src/x.tsx", + 'const { x } = await import("@tauri-apps/api/window");', + "dynamic_import", + ), + ClassifyCase( + "U04", + "require()", + "lodash", + "src/x.js", + 'const _ = require("lodash");', + "require", + ), + ClassifyCase( + "U05", + "CSS @import", + "tailwindcss", + "src/x.css", + '@import "tailwindcss";', + "css_import", + ), + # Sneaky shapes + ClassifyCase( + "U06", + "multi-line static import", + "next-themes", + "src/x.tsx", + 'import {\n ThemeProvider,\n useTheme,\n} from "next-themes";', + "static_import", + ), + ClassifyCase( + "U07", + "import type", + "@huggingface/hub", + "src/x.ts", + 'import type { PipelineType } from "@huggingface/hub";', + "static_import", + ), + ClassifyCase( + "U08", + "export * from re-export", + "@some-org/secrets", + "src/x.ts", + 'export * from "@some-org/secrets";', + "re_export", + ), + ClassifyCase( + "U09", + "export { x } from re-export", + "lodash-es", + "src/x.ts", + 'export { foo, bar } from "lodash-es";', + "re_export", + ), + ClassifyCase( + "U10", + "export type ... from re-export", + "@huggingface/hub", + "src/x.ts", + 'export type { Foo } from "@huggingface/hub";', + "re_export", + ), + ClassifyCase( + "U11", + "multi-line export from re-export", + "@some/pkg", + "src/x.ts", + 'export {\n thing,\n other,\n} from "@some/pkg";', + "re_export", + ), + ClassifyCase( + "U12", + "JSDoc @import", + "react", + "src/x.ts", + '/** @type {import("react").FC} */\nconst Foo = () => null;', + "dynamic_import", + ), + ClassifyCase( + "U13", + "template literal package path", + "@assistant-ui/react", + "src/x.tsx", + "const url = `@assistant-ui/react`;", + "template_literal", + ), + ClassifyCase( + "U14", + "new URL import-meta", + "monaco-editor", + "src/x.ts", + 'new URL("monaco-editor/esm/vs/editor/editor.worker", import.meta.url);', + "new_url", + ), + ClassifyCase( + "U15", + "tsc triple-slash type ref", + "@types/some-pkg", + "src/x.ts", + '/// ', + "tsc_triple_slash", + ), + ClassifyCase( + "U16", + "HTML script src", + "alpinejs", + "index.html", + '', + "html_script", + ), + ClassifyCase( + "U17", + "HTML link href", + "alpinejs", + "index.html", + '', + "html_link", + ), + ClassifyCase( + "U18", + "bare quoted string in tsconfig paths", + "@huggingface/hub", + "tsconfig.json", + '"paths": { "hf": ["@huggingface/hub/*"] }', + "string_literal", + ), + ClassifyCase( + "U19", + "vite alias key", + "@dagrejs/dagre", + "vite.config.ts", + '"@dagrejs/dagre": path.resolve(__dirname, "./..."),', + "string_literal", + ), + # False-positive guards (these should NOT detect) + ClassifyCase( + "U20", + "different package with shared prefix", + "foo", + "src/x.ts", + 'import { x } from "foobar";', + None, + ), + ClassifyCase( + "U21", + "package mentioned in plain comment text", + "react", + "src/x.ts", + "// We migrated from react-router to tanstack-router", + None, + ), + ClassifyCase( + "U22", + "package name as a URL path tail is NOT detected " + "(boundary rule: pkg must be followed by quote or `/`)", + "react", + "src/x.ts", + 'const docs = "https://example.com/react";', + None, + ), + ClassifyCase( + "U23", + "package name in Python file (ignored, " + "Python can never import npm packages)", + "playwright", + "tests/x.py", + 'label: str = "playwright"', + None, + ), + ClassifyCase( + "U24", + "exact-prefix collision: pkg 'lodash' and 'lodash-es'", + "lodash", + "src/x.ts", + 'import _ from "lodash-es";', + None, + ), + ClassifyCase( + "U25", + "scoped pkg substring collision", + "@radix-ui/react-label", + "src/x.ts", + 'import x from "@radix-ui/react-label-extra";', + None, + ), + ClassifyCase( + "U26", + "package only mentioned in a markdown link", + "react", + "README.md", + "See [react](https://react.dev).", + None, + ), + ClassifyCase( + "U27", + "side-effect import with subpath", + "katex", + "src/x.css", + '@import "katex/dist/katex.min.css";', + "css_import", + ), + ClassifyCase( + "U28", + "require.resolve", + "lodash", + "build/x.cjs", + 'const path = require.resolve("lodash/fp");', + "require", + ), + ClassifyCase( + "U29", + "TypeScript ambient `declare module`", + "@tanstack/react-router", + "src/app/router.tsx", + 'declare module "@tanstack/react-router" {\n interface X {}\n}', + "string_literal", + ), + ClassifyCase( + "U30", + "namespace import `import * as X from pkg`", + "@radix-ui/react-slot", + "src/x.tsx", + 'import * as Slot from "@radix-ui/react-slot";', + "static_import", + ), + ClassifyCase( + "U31", + "combined default + named import", + "react", + "src/x.tsx", + 'import React, { useState } from "react";', + "static_import", + ), + ClassifyCase( + "U32", + "default-as-named import alias", + "react", + "src/x.tsx", + 'import { default as R } from "react";', + "static_import", + ), + ClassifyCase( + "U33", + "re-export default", + "lodash", + "src/x.ts", + 'export { default } from "lodash";', + "re_export", + ), + ClassifyCase( + "U34", + "re-export default as alias", + "lodash", + "src/x.ts", + 'export { default as _ } from "lodash";', + "re_export", + ), + ClassifyCase( + "U35", + ".then() dynamic import (no await)", + "@tauri-apps/api", + "src/x.ts", + 'import("@tauri-apps/api/window").then(m => m.x());', + "dynamic_import", + ), + ClassifyCase( + "U36", + "TypeScript import() in type position", + "react", + "src/x.ts", + 'type C = import("react").ComponentType;', + "dynamic_import", + ), + # File-type gating (codex P1: JS classifiers must not fire on + # non-script files). Python fixtures and Markdown code blocks often + # contain literal JS-shaped strings for documentation or test data, + # so a bare `import x from "pkg"` inside a .py / .md / .sh / .yml is + # not a real npm usage. + ClassifyCase( + "U37", + "JS import snippet inside a Python fixture string is NOT a usage", + "next-themes", + "tests/studio/something.py", + "snippet = 'import x from \"next-themes\";'", + None, + ), + ClassifyCase( + "U38", + "JS import snippet inside a Markdown code fence is NOT a usage", + "next-themes", + "docs/example.md", + '```ts\nimport x from "next-themes";\n```', + None, + ), + ClassifyCase( + "U39", + "JS import inside a shell script is NOT classified as a JS usage", + "next-themes", + "scripts/build.sh", + 'echo "import x from \\"next-themes\\";"', + None, + ), + ClassifyCase( + "U40", + "JS import inside a YAML workflow is NOT classified as a JS usage", + "next-themes", + ".github/workflows/x.yml", + "run: echo 'import x from \"next-themes\";'", + None, + ), + # HTML script/link must respect package-name boundaries: a + # `/node_modules/foo-extra/...` reference does NOT use `foo`. + ClassifyCase( + "U41", + "HTML ', + None, + ), + ClassifyCase( + "U42", + "HTML with similar-prefix package is NOT a match", + "foo", + "index.html", + '', + None, + ), + ClassifyCase( + "U43", + "HTML ', + "html_script", + ), + # CSS url() unquoted variant -- valid CSS, must classify the same + # as the quoted variant. + ClassifyCase( + "U44", + "CSS url() unquoted bare package path", + "katex", + "src/x.css", + "src: url(katex/dist/fonts/font.woff2);", + "css_url", + ), + ClassifyCase( + "U45", + "CSS url() quoted bare package path still works", + "katex", + "src/x.css", + 'src: url("katex/dist/fonts/font.woff2");', + "css_url", + ), +] + + +def run_classify_unit_tests() -> int: + passed = 0 + for c in CLASSIFY_CASES: + actual = classify(c.pkg, c.file, c.content) + ok = actual == c.expected_kind + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {c.id}: {c.desc}") + if not ok: + print(f" pkg={c.pkg!r} file={c.file!r}") + print(f" content={c.content!r}") + print(f" expected={c.expected_kind!r}, actual={actual!r}") + if ok: + passed += 1 + print() + print(f"{passed}/{len(CLASSIFY_CASES)} classify-unit cases pass") + return 0 if passed == len(CLASSIFY_CASES) else 1 + + +# --------------------------------------------------------------------------- +# Adversarial end-to-end cases: drop a sneaky synthetic file into src/, +# run the checker, then clean up. Catches the case where pattern detection +# regresses for a real grep+classify pipeline (not just classify in isolation). +# --------------------------------------------------------------------------- + +ADVERSARIAL_TMP_DIR = REPO / "studio/frontend/src/__dep_check_adversarial__" + + +@dataclass +class AdvCase: + id: str + desc: str + filename: str + content: str + target_pkg: str + expected_status: str + expected_failures: list[str] + + +ADV_CASES: list[AdvCase] = [ + AdvCase( + "A01", + "multi-line import of removed pkg should FAIL", + "adv01.ts", + 'import {\n foo,\n bar,\n} from "__adv_only_pkg_a__";\n', + "__adv_only_pkg_a__", + "FAIL", + ["__adv_only_pkg_a__"], + ), + AdvCase( + "A02", + "export * from removed pkg should FAIL", + "adv02.ts", + 'export * from "__adv_only_pkg_b__";\n', + "__adv_only_pkg_b__", + "FAIL", + ["__adv_only_pkg_b__"], + ), + AdvCase( + "A03", + "export { x } from removed pkg should FAIL", + "adv03.ts", + 'export { foo, bar } from "__adv_only_pkg_c__";\n', + "__adv_only_pkg_c__", + "FAIL", + ["__adv_only_pkg_c__"], + ), + AdvCase( + "A04", + "export type ... from removed pkg should FAIL", + "adv04.ts", + 'export type { Foo } from "__adv_only_pkg_d__";\n', + "__adv_only_pkg_d__", + "FAIL", + ["__adv_only_pkg_d__"], + ), + AdvCase( + "A05", + "package with similar prefix should NOT trigger FAIL", + "adv05.ts", + # The file imports __adv_only_pkg_e_extra__, but we will try + # to "remove" the shorter __adv_only_pkg_e__ name. The shorter + # name has zero real usage, so removal must be safe. + 'import x from "__adv_only_pkg_e_extra__";\n', + "__adv_only_pkg_e__", + "PASS", + [], + ), + AdvCase( + "A06", + "dynamic import of removed pkg should FAIL", + "adv06.ts", + 'const m = await import("__adv_only_pkg_f__");\n', + "__adv_only_pkg_f__", + "FAIL", + ["__adv_only_pkg_f__"], + ), + AdvCase( + "A07", + "new URL of removed pkg should FAIL", + "adv07.ts", + 'const w = new URL("__adv_only_pkg_g__/worker.js", import.meta.url);\n', + "__adv_only_pkg_g__", + "FAIL", + ["__adv_only_pkg_g__"], + ), + AdvCase( + "A08", + "string-concat dynamic import is unanalyzable (PASS)", + "adv08.ts", + 'const m = await import("__adv_only_" + "pkg_h__");\n', + "__adv_only_pkg_h__", + "PASS", + [], + ), + AdvCase( + "A09", + "package referenced only inside a JS comment " + "is conservatively flagged via the string_literal fallback " + "(this is acceptable -- err on the side of caution)", + "adv09.ts", + '// TODO: import x from "__adv_only_pkg_i__"\n', + "__adv_only_pkg_i__", + "FAIL", + ["__adv_only_pkg_i__"], + ), + AdvCase( + "A10", + "package referenced only in a Python file should " "NOT trigger a JS FAIL", + "adv10.py", + 'label = "__adv_only_pkg_j__"\n', + "__adv_only_pkg_j__", + "PASS", + [], + ), + AdvCase( + "A11", + "package mentioned in a markdown doc file is " + "ignored by JS-like-only string_literal", + "adv11.md", + "See [docs](https://example.com/__adv_only_pkg_k__).\n", + "__adv_only_pkg_k__", + "PASS", + [], + ), + AdvCase( + "A12", + "JSDoc @import of removed pkg should FAIL", + "adv12.ts", + '/** @type {import("__adv_only_pkg_l__").Foo} */\n' "const x = null;\n", + "__adv_only_pkg_l__", + "FAIL", + ["__adv_only_pkg_l__"], + ), + # Prettier formats a long named-import list one identifier per line. + # 22 imports + braces puts the `import` keyword ~22 lines away from + # the `from "pkg"` clause. Before the window widening, the classify + # multi-line fallback used ±4 lines, which silently missed every + # such block. This case fails with the old window and passes once + # the window is wide enough (currently ±25). + AdvCase( + "A13", + "Prettier-style 22-identifier multi-line import should FAIL " + "(exercises the widened multi-line classify window)", + "adv13.ts", + "import {\n" + + "".join(f" ident_{i:02d},\n" for i in range(22)) + + '} from "__adv_only_pkg_m__";\n', + "__adv_only_pkg_m__", + "FAIL", + ["__adv_only_pkg_m__"], + ), +] + + +# --------------------------------------------------------------------------- +# package.json field-reference cases: simulate `prettier: "@x/config"`, +# `eslintConfig.extends`, `overrides`, `peerDependenciesMeta`, etc. +# These test the package_json_extra_refs() coverage. Cross-checked against +# the patterns used by Tailwind, Stylelint, Prettier, Next.js, Astro, +# TypeScript, ESLint, SvelteKit, Storybook, Vite, and TanStack/Query +# manifests. +# --------------------------------------------------------------------------- + + +@dataclass +class PkgFieldCase: + id: str + desc: str + field_patch: dict # extra fields to merge into synth_head package.json + target_pkg: str + expected_status: str + expected_failures: list[str] + + +PKG_FIELD_CASES: list[PkgFieldCase] = [ + PkgFieldCase( + "P01", + "removing pkg referenced only in `prettier` string field", + {"prettier": "__pkg_prettier_config__"}, + "__pkg_prettier_config__", + "FAIL", + ["__pkg_prettier_config__"], + ), + PkgFieldCase( + "P02", + "removing pkg referenced in `eslintConfig.extends` array", + {"eslintConfig": {"extends": ["__pkg_eslint_cfg__"]}}, + "__pkg_eslint_cfg__", + "FAIL", + ["__pkg_eslint_cfg__"], + ), + PkgFieldCase( + "P03", + "removing pkg referenced in `stylelint.plugins`", + {"stylelint": {"plugins": ["__pkg_stylelint_plugin__"]}}, + "__pkg_stylelint_plugin__", + "FAIL", + ["__pkg_stylelint_plugin__"], + ), + PkgFieldCase( + "P04", + "removing pkg referenced in `babel.presets`", + {"babel": {"presets": [["__pkg_babel_preset__", {"opt": 1}]]}}, + "__pkg_babel_preset__", + "FAIL", + ["__pkg_babel_preset__"], + ), + PkgFieldCase( + "P05", + "removing pkg used as a key in `overrides`", + {"overrides": {"__pkg_overridden__": "^1.0.0"}}, + "__pkg_overridden__", + "FAIL", + ["__pkg_overridden__"], + ), + PkgFieldCase( + "P06", + "removing pkg used as a key in `pnpm.overrides`", + {"pnpm": {"overrides": {"__pkg_pnpm_override__": "^1.0.0"}}}, + "__pkg_pnpm_override__", + "FAIL", + ["__pkg_pnpm_override__"], + ), + PkgFieldCase( + "P07", + "removing pkg used as a key in `pnpm.patchedDependencies`", + {"pnpm": {"patchedDependencies": {"__pkg_patched__": "patches/x.patch"}}}, + "__pkg_patched__", + "FAIL", + ["__pkg_patched__"], + ), + PkgFieldCase( + "P08", + "removing pkg used as a key in `peerDependenciesMeta`", + {"peerDependenciesMeta": {"__pkg_peer_meta__": {"optional": True}}}, + "__pkg_peer_meta__", + "FAIL", + ["__pkg_peer_meta__"], + ), + PkgFieldCase( + "P09", + "removing pkg referenced in `jest.preset` string", + {"jest": {"preset": "__pkg_jest_preset__"}}, + "__pkg_jest_preset__", + "FAIL", + ["__pkg_jest_preset__"], + ), + PkgFieldCase( + "P10", + "removing pkg referenced in `commitlint.extends`", + {"commitlint": {"extends": ["__pkg_commitlint__"]}}, + "__pkg_commitlint__", + "FAIL", + ["__pkg_commitlint__"], + ), + PkgFieldCase( + "P11", + "removing pkg referenced in `renovate.extends`", + {"renovate": {"extends": ["__pkg_renovate__"]}}, + "__pkg_renovate__", + "FAIL", + ["__pkg_renovate__"], + ), + PkgFieldCase( + "P12", + "removing pkg referenced in `remarkConfig.plugins`", + {"remarkConfig": {"plugins": ["__pkg_remark__"]}}, + "__pkg_remark__", + "FAIL", + ["__pkg_remark__"], + ), + PkgFieldCase( + "P13", + "removing pkg with subpath ref in tool config (`pkg/config`)", + {"prettier": "__pkg_prettier_sub__/config"}, + "__pkg_prettier_sub__", + "FAIL", + ["__pkg_prettier_sub__"], + ), + PkgFieldCase( + "P14", + "false-positive guard: similar-prefix package in tool config", + {"prettier": "__pkg_short_extra__/config"}, + "__pkg_short__", + "PASS", + [], + ), + PkgFieldCase( + "P15", + "false-positive guard: package-named string in `browserslist` " + "must NOT trigger (browserslist values are browser queries, " + "never package names)", + {"browserslist": ["last 2 versions", "__pkg_browserslist__"]}, + "__pkg_browserslist__", + "PASS", + [], + ), + PkgFieldCase( + "P16", + "false-positive guard: matching string in `keywords` field", + {"keywords": ["__pkg_keyword__", "foo"]}, + "__pkg_keyword__", + "PASS", + [], + ), + PkgFieldCase( + "P17", + "false-positive guard: matching string in `workspaces` (paths)", + {"workspaces": ["packages/__pkg_workspace_path__"]}, + "__pkg_workspace_path__", + "PASS", + [], + ), + PkgFieldCase( + "P18", + "false-positive guard: matching value in `files` field", + {"files": ["dist/__pkg_in_files__"]}, + "__pkg_in_files__", + "PASS", + [], + ), + PkgFieldCase( + "P19", + "false-positive guard: matching `packageManager` string", + {"packageManager": "__pkg_in_pm__@1.0.0"}, + "__pkg_in_pm__", + "PASS", + [], + ), +] + + +def run_pkg_field_cases() -> int: + head_pkg = json.loads(HEAD_PKG.read_text()) + passed = 0 + for pc in PKG_FIELD_CASES: + synth_head = json.loads(json.dumps(head_pkg)) + # Apply the field patch (deep-merge isn't needed; we control the keys). + for k, v in pc.field_patch.items(): + synth_head[k] = v + # Base has the target in dependencies; head does not. The extra field + # in synth_head references the target pkg even though it's no longer + # in deps. + synth_base = json.loads(json.dumps(head_pkg)) + synth_base.setdefault("dependencies", {})[pc.target_pkg] = "^1.0.0" + with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f: + json.dump(synth_base, f, indent = 2) + base_path = f.name + with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f: + json.dump(synth_head, f, indent = 2) + head_path = f.name + try: + proc = subprocess.run( + [ + sys.executable, + str(SCRIPT), + "--base-pkg", + base_path, + "--head-pkg", + head_path, + "--head-lock", + str(HEAD_LOCK), + ], + capture_output = True, + text = True, + cwd = str(REPO), + ) + finally: + os.unlink(base_path) + os.unlink(head_path) + actual_status = {0: "PASS", 1: "FAIL"}.get( + proc.returncode, f"RC{proc.returncode}" + ) + fails: list[str] = [] + in_summary = False + for line in proc.stdout.splitlines(): + if "FAIL:" in line and "removed package" in line: + in_summary = True + continue + if in_summary and line.strip().startswith("- "): + fails.append(line.strip()[2:]) + # The expected_failures includes the tolerated-FP case (P15); we + # accept BOTH expected_status and expected_failures matches. + ok = actual_status == pc.expected_status and set(fails) == set( + pc.expected_failures + ) + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {pc.id}: {pc.desc}") + if not ok: + print( + f" expected: status={pc.expected_status} fails={pc.expected_failures}" + ) + print(f" actual: status={actual_status} fails={fails}") + for ln in proc.stdout.splitlines()[:25]: + print(f" {ln}") + if ok: + passed += 1 + print() + print(f"{passed}/{len(PKG_FIELD_CASES)} package.json-field cases pass") + return 0 if passed == len(PKG_FIELD_CASES) else 1 + + +def run_adversarial_cases() -> int: + ADVERSARIAL_TMP_DIR.mkdir(parents = True, exist_ok = True) + head_pkg = json.loads(HEAD_PKG.read_text()) + passed = 0 + for ac in ADV_CASES: + # Drop the synthetic file. + fpath = ADVERSARIAL_TMP_DIR / ac.filename + try: + fpath.write_text(ac.content) + # Build a synthetic base that has the target pkg added; head + # is the real head (without it). The script sees the pkg as + # removed and scans the repo, which now includes our file. + synth_base = json.loads(json.dumps(head_pkg)) + synth_base.setdefault("dependencies", {})[ac.target_pkg] = "^1.0.0" + with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f: + json.dump(synth_base, f, indent = 2) + base_path = f.name + try: + proc = subprocess.run( + [ + sys.executable, + str(SCRIPT), + "--base-pkg", + base_path, + "--head-pkg", + str(HEAD_PKG), + "--head-lock", + str(HEAD_LOCK), + ], + capture_output = True, + text = True, + cwd = str(REPO), + ) + finally: + os.unlink(base_path) + actual_status = {0: "PASS", 1: "FAIL"}.get( + proc.returncode, f"RC{proc.returncode}" + ) + fails = [] + in_summary = False + for line in proc.stdout.splitlines(): + if "FAIL:" in line and "removed package" in line: + in_summary = True + continue + if in_summary and line.strip().startswith("- "): + fails.append(line.strip()[2:]) + ok = actual_status == ac.expected_status and set(fails) == set( + ac.expected_failures + ) + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {ac.id}: {ac.desc}") + if not ok: + print( + f" expected: status={ac.expected_status} fails={ac.expected_failures}" + ) + print(f" actual: status={actual_status} fails={fails}") + for ln in proc.stdout.splitlines()[:20]: + print(f" {ln}") + if ok: + passed += 1 + finally: + try: + fpath.unlink() + except FileNotFoundError: + pass + # Clean up the directory. + try: + ADVERSARIAL_TMP_DIR.rmdir() + except OSError: + pass + print() + print(f"{passed}/{len(ADV_CASES)} adversarial cases pass") + return 0 if passed == len(ADV_CASES) else 1 + + +# --------------------------------------------------------------------------- +# Dead-dep enumeration cases. +# --------------------------------------------------------------------------- + + +@dataclass +class EnumCase: + id: str + desc: str + add_deps: dict[str, str] + add_dev_deps: dict[str, str] + field_patch: dict + extra_file: tuple[str, str] | None # (relative_path, content) or None + expected_unused: set[str] + expected_used: set[str] + expected_orphan_types: set[str] + + +ENUM_CASES: list[EnumCase] = [ + EnumCase( + "E01", + "fake dep with no usage anywhere is flagged unused", + {"__enum_fake_unused_pkg__": "^1.0.0"}, + {}, + {}, + None, + {"__enum_fake_unused_pkg__"}, + set(), + set(), + ), + EnumCase( + "E02", + "fake dep referenced via vite.config-style import is flagged used " + "(uses a real adversarial file as the import site)", + {"__enum_used_via_src__": "^1.0.0"}, + {}, + {}, + ( + "src/__dep_check_adversarial__/enum_e02.ts", + 'import x from "__enum_used_via_src__";\n', + ), + set(), + {"__enum_used_via_src__"}, + set(), + ), + EnumCase( + "E03", + "fake dep referenced only in package.json `overrides` is flagged used", + {"__enum_used_via_overrides__": "^1.0.0"}, + {}, + {"overrides": {"__enum_used_via_overrides__": "^1.0.0"}}, + None, + set(), + {"__enum_used_via_overrides__"}, + set(), + ), + EnumCase( + "E04", + "@types/X where X is declared -> kept (NOT orphan)", + {"__enum_real_pkg__": "^1.0.0"}, + {"@types/__enum_real_pkg__": "^1.0.0"}, + {}, + ( + "src/__dep_check_adversarial__/enum_e04.ts", + 'import x from "__enum_real_pkg__";\n', + ), + set(), + {"__enum_real_pkg__"}, + set(), + ), + EnumCase( + "E05", + "@types/X where X is NOT declared anywhere -> orphan", + {}, + {"@types/__enum_orphan_pkg__": "^1.0.0"}, + {}, + None, + set(), + set(), + {"@types/__enum_orphan_pkg__"}, + ), +] + + +def run_enum_cases() -> int: + head_pkg = json.loads(HEAD_PKG.read_text()) + passed = 0 + ADVERSARIAL_TMP_DIR.mkdir(parents = True, exist_ok = True) + for ec in ENUM_CASES: + synth_head = json.loads(json.dumps(head_pkg)) + synth_head.setdefault("dependencies", {}).update(ec.add_deps) + synth_head.setdefault("devDependencies", {}).update(ec.add_dev_deps) + for k, v in ec.field_patch.items(): + synth_head[k] = v + # Drop any temp source file if needed. + fpath = None + if ec.extra_file: + rel, content = ec.extra_file + fpath = REPO / rel + fpath.parent.mkdir(parents = True, exist_ok = True) + fpath.write_text(content) + with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f: + json.dump(synth_head, f, indent = 2) + head_path = f.name + try: + proc = subprocess.run( + [ + sys.executable, + str(SCRIPT), + "--base-pkg", + str(HEAD_PKG), + "--head-pkg", + head_path, + "--head-lock", + str(HEAD_LOCK), + "--enumerate-dead", + ], + capture_output = True, + text = True, + cwd = str(REPO), + ) + finally: + os.unlink(head_path) + if fpath: + try: + fpath.unlink() + except FileNotFoundError: + pass + # Parse the dead-dep enumeration output. + unused: set[str] = set() + orphans: set[str] = set() + in_unused = False + in_orphan = False + for line in proc.stdout.splitlines(): + s = line.strip() + if s.startswith("unused ("): + in_unused = True + in_orphan = False + continue + if s.startswith("type_pkg_orphan ("): + in_unused = False + in_orphan = True + continue + if s.startswith("used:") or s.startswith("type_pkg_kept:"): + in_unused = in_orphan = False + continue + if s.startswith("- "): + if in_unused: + unused.add(s[2:]) + elif in_orphan: + orphans.add(s[2:]) + unused_ok = ec.expected_unused.issubset(unused) and ( + not ec.expected_used or not (ec.expected_used & unused) + ) + orphan_ok = ec.expected_orphan_types.issubset(orphans) + ok = unused_ok and orphan_ok + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {ec.id}: {ec.desc}") + if not ok: + print(f" expected unused superset: {sorted(ec.expected_unused)}") + print(f" expected used NOT in unused: {sorted(ec.expected_used)}") + print( + f" expected orphans superset: {sorted(ec.expected_orphan_types)}" + ) + print(f" actual unused: {sorted(unused)}") + print(f" actual orphans: {sorted(orphans)}") + for ln in proc.stdout.splitlines()[:30]: + print(f" {ln}") + if ok: + passed += 1 + # Cleanup tmp dir if empty. + try: + ADVERSARIAL_TMP_DIR.rmdir() + except OSError: + pass + print() + print(f"{passed}/{len(ENUM_CASES)} enumeration cases pass") + return 0 if passed == len(ENUM_CASES) else 1 + + +# --------------------------------------------------------------------------- +# Script-wrapper cases: exercise scripts_bin_refs / _next_real_bin so a +# package.json script like `cross-env CI=1 biome check` correctly credits +# `@biomejs/biome` rather than the wrapper itself. The 10x reviewer flagged +# the original "first non-env token" heuristic as too narrow: any project +# using cross-env / dotenv / dotenvx / env-cmd / a quoted env value would +# bypass the bin-name check. +# --------------------------------------------------------------------------- + + +@dataclass +class WrapperCase: + id: str + desc: str + raw_cmd: str + expected_bin: str | None # None means "no real bin (e.g. unwrappable)" + + +WRAPPER_CASES: list[WrapperCase] = [ + WrapperCase( + "W01", + "cross-env wraps the real bin", + "cross-env CI=1 biome check .", + "biome", + ), + WrapperCase( + "W02", + "cross-env with multiple env tokens after the wrapper", + "cross-env A=1 B=2 NODE_ENV=prod biome check", + "biome", + ), + WrapperCase( + "W03", + "bare env-prefix run (no wrapper) still peels the env tokens", + "FOO=bar biome check", + "biome", + ), + WrapperCase( + "W04", + "quoted env value with spaces (shlex preserves it as one word)", + 'FOO="a b" biome check', + "biome", + ), + WrapperCase( + "W05", + "npx + cross-env: runner peels, wrapper peels, real bin wins", + "npx cross-env CI=1 biome check", + "biome", + ), + WrapperCase( + "W06", + "pnpm exec + cross-env", + "pnpm exec cross-env CI=1 biome check", + "biome", + ), + WrapperCase( + "W07", + "dotenv with the `--` separator before the wrapped command", + "dotenv -- biome check", + "biome", + ), + WrapperCase( + "W08", + "dotenv with a flag-arg pair and `--` separator", + "dotenv -e .env -- biome check", + "biome", + ), + WrapperCase( + "W09", + "leading `./node_modules/.bin/` prefix is stripped", + "./node_modules/.bin/biome check", + "biome", + ), + WrapperCase( + "W10", + "concurrently is NOT a script wrapper -- it dispatches by " + "script *name*, not bin, so the real bin is `concurrently` " + "itself (the wrapped script names are credited by their own " + "scripts entries, which scripts_bin_refs iterates separately)", + 'concurrently "npm:dev" "npm:typecheck"', + "concurrently", + ), +] + + +def run_wrapper_cases() -> int: + import shlex + + passed = 0 + for wc in WRAPPER_CASES: + try: + words = shlex.split(wc.raw_cmd, posix = True) + except ValueError: + words = wc.raw_cmd.split() + actual = _next_real_bin(words, 0) + ok = actual == wc.expected_bin + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {wc.id}: {wc.desc}") + if not ok: + print(f" raw_cmd={wc.raw_cmd!r}") + print(f" expected={wc.expected_bin!r}, actual={actual!r}") + if ok: + passed += 1 + + # End-to-end integration: feed scripts_bin_refs a synthetic head_pkg + # whose scripts use a wrapper, and confirm the package owning the + # wrapped bin is credited (rather than the wrapper). This is the + # actual call path used by find_command_usage(). + int_total = 0 + int_passed = 0 + int_cases = [ + ( + "I01", + "cross-env wrapping `biome` credits @biomejs/biome", + {"lint": "cross-env CI=1 biome check"}, + {"biome": "@biomejs/biome"}, + "@biomejs/biome", + ), + ( + "I02", + "dotenv -- biome credits @biomejs/biome", + {"lint": "dotenv -- biome check"}, + {"biome": "@biomejs/biome"}, + "@biomejs/biome", + ), + ( + "I03", + "quoted env value before bin still credits the bin's owner", + {"lint": 'FOO="a b" biome check .'}, + {"biome": "@biomejs/biome"}, + "@biomejs/biome", + ), + ( + "I04", + "&& chain: both halves credit their owning packages", + {"build": "tsc -b && cross-env CI=1 biome check"}, + {"tsc": "typescript", "biome": "@biomejs/biome"}, + None, # checked via owning_pkgs below + ), + ] + for case_id, desc, scripts, bin_to_pkg, expect_owner in int_cases: + int_total += 1 + refs = scripts_bin_refs({"scripts": scripts}, bin_to_pkg) + if case_id == "I04": + owners = set(refs.keys()) + ok = owners == {"typescript", "@biomejs/biome"} + else: + ok = expect_owner in refs + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {case_id}: {desc}") + if not ok: + print(f" scripts={scripts!r} bin_to_pkg={bin_to_pkg!r}") + print(f" refs={refs!r}") + if ok: + int_passed += 1 + + total = len(WRAPPER_CASES) + int_total + print() + print(f"{passed + int_passed}/{total} wrapper-script cases pass") + return 0 if (passed == len(WRAPPER_CASES) and int_passed == int_total) else 1 + + +def main() -> int: + head_pkg = json.loads(HEAD_PKG.read_text()) + print(f"Running {len(CASES)} edge cases against {SCRIPT.relative_to(REPO)}") + print() + results: list[tuple[Case, bool, str]] = [] + for c in CASES: + ok, detail = run_case(c, head_pkg) + results.append((c, ok, detail)) + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {c.id}: {c.desc}") + if not ok: + for line in detail.splitlines(): + print(f" {line}") + print() + passed = sum(1 for _, ok, _ in results if ok) + total = len(results) + print(f"{passed}/{total} edge cases pass") + + print() + print(f"Running {len(CLASSIFY_CASES)} classify() unit cases") + print() + cls_rc = run_classify_unit_tests() + + print() + print(f"Running {len(ADV_CASES)} adversarial end-to-end cases") + print() + adv_rc = run_adversarial_cases() + + print() + print(f"Running {len(PKG_FIELD_CASES)} package.json-field cases") + print() + pkg_rc = run_pkg_field_cases() + + print() + print(f"Running {len(ENUM_CASES)} dead-dep enumeration cases") + print() + enum_rc = run_enum_cases() + + print() + print( + f"Running {len(WRAPPER_CASES)} script-wrapper cases " + "(_next_real_bin + scripts_bin_refs end-to-end)" + ) + print() + wrap_rc = run_wrapper_cases() + + if ( + passed == total + and cls_rc == 0 + and adv_rc == 0 + and pkg_rc == 0 + and enum_rc == 0 + and wrap_rc == 0 + ): + return 0 + return 1 + + +if __name__ == "__main__": + raise SystemExit(main())