diff --git a/scripts/docs-link-audit.d.mts b/scripts/docs-link-audit.d.mts new file mode 100644 index 00000000000..7cc2d22f53a --- /dev/null +++ b/scripts/docs-link-audit.d.mts @@ -0,0 +1,22 @@ +export type BrokenDocLink = { + file: string; + line: number; + link: string; + reason: string; +}; + +export type ResolveRouteResult = { + ok: boolean; + terminal: string; + loop?: boolean; +}; + +export function normalizeRoute(route: string): string; +export function resolveRoute( + route: string, + options?: { redirects?: Map; routes?: Set }, +): ResolveRouteResult; +export function auditDocsLinks(): { + checked: number; + broken: BrokenDocLink[]; +}; diff --git a/scripts/docs-link-audit.mjs b/scripts/docs-link-audit.mjs index 2419fc2d8f5..e60bff8f3ee 100644 --- a/scripts/docs-link-audit.mjs +++ b/scripts/docs-link-audit.mjs @@ -2,6 +2,7 @@ import fs from "node:fs"; import path from "node:path"; +import { pathToFileURL } from "node:url"; const ROOT = process.cwd(); const DOCS_DIR = path.join(ROOT, "docs"); @@ -42,8 +43,10 @@ function normalizeSlashes(p) { } /** @param {string} p */ -function normalizeRoute(p) { - const stripped = p.replace(/^\/+|\/+$/g, ""); +export function normalizeRoute(p) { + const [withoutFragment] = p.split("#"); + const [withoutQuery] = withoutFragment.split("?"); + const stripped = withoutQuery.replace(/^\/+|\/+$/g, ""); return stripped ? `/${stripped}` : "/"; } @@ -105,22 +108,27 @@ for (const abs of markdownFiles) { routes.add(normalizeRoute(permalink)); } -/** @param {string} route */ -function resolveRoute(route) { +/** + * @param {string} route + * @param {{redirects?: Map, routes?: Set}} [options] + */ +export function resolveRoute(route, options = {}) { + const redirectMap = options.redirects ?? redirects; + const publishedRoutes = options.routes ?? routes; let current = normalizeRoute(route); if (current === "/") { return { ok: true, terminal: "/" }; } const seen = new Set([current]); - while (redirects.has(current)) { - current = redirects.get(current); + while (redirectMap.has(current)) { + current = normalizeRoute(redirectMap.get(current)); if (seen.has(current)) { return { ok: false, terminal: current, loop: true }; } seen.add(current); } - return { ok: routes.has(current), terminal: current }; + return { ok: publishedRoutes.has(current), terminal: current }; } /** @param {unknown} node */ @@ -160,138 +168,141 @@ function collectNavPageEntries(node) { const markdownLinkRegex = /!?\[[^\]]*\]\(([^)]+)\)/g; -/** @type {{file: string; line: number; link: string; reason: string}[]} */ -const broken = []; -let checked = 0; +export function auditDocsLinks() { + /** @type {{file: string; line: number; link: string; reason: string}[]} */ + const broken = []; + let checked = 0; -for (const abs of markdownFiles) { - const rel = normalizeSlashes(path.relative(DOCS_DIR, abs)); - const baseDir = normalizeSlashes(path.dirname(rel)); - const rawText = fs.readFileSync(abs, "utf8"); - const lines = rawText.split("\n"); + for (const abs of markdownFiles) { + const rel = normalizeSlashes(path.relative(DOCS_DIR, abs)); + const baseDir = normalizeSlashes(path.dirname(rel)); + const rawText = fs.readFileSync(abs, "utf8"); + const lines = rawText.split("\n"); - // Track if we're inside a code fence - let inCodeFence = false; + let inCodeFence = false; - for (let lineNum = 0; lineNum < lines.length; lineNum++) { - let line = lines[lineNum]; + for (let lineNum = 0; lineNum < lines.length; lineNum++) { + let line = lines[lineNum]; - // Toggle code fence state - if (line.trim().startsWith("```")) { - inCodeFence = !inCodeFence; - continue; - } - if (inCodeFence) { - continue; - } - - // Strip inline code to avoid false positives - line = stripInlineCode(line); - - for (const match of line.matchAll(markdownLinkRegex)) { - const raw = match[1]?.trim(); - if (!raw) { + if (line.trim().startsWith("```")) { + inCodeFence = !inCodeFence; continue; } - // Skip external links, mailto, tel, data, and same-page anchors - if (/^(https?:|mailto:|tel:|data:|#)/i.test(raw)) { + if (inCodeFence) { continue; } - const [pathPart] = raw.split("#"); - const clean = pathPart.split("?")[0]; - if (!clean) { - // Same-page anchor only (already skipped above) - continue; - } - checked++; + line = stripInlineCode(line); - if (clean.startsWith("/")) { - const route = normalizeRoute(clean); - const resolvedRoute = resolveRoute(route); - if (!resolvedRoute.ok) { - const staticRel = route.replace(/^\//, ""); - if (!relAllFiles.has(staticRel)) { + for (const match of line.matchAll(markdownLinkRegex)) { + const raw = match[1]?.trim(); + if (!raw) { + continue; + } + if (/^(https?:|mailto:|tel:|data:|#)/i.test(raw)) { + continue; + } + + const [pathPart] = raw.split("#"); + const clean = pathPart.split("?")[0]; + if (!clean) { + continue; + } + checked++; + + if (clean.startsWith("/")) { + const route = normalizeRoute(clean); + const resolvedRoute = resolveRoute(route); + if (!resolvedRoute.ok) { + const staticRel = route.replace(/^\//, ""); + if (!relAllFiles.has(staticRel)) { + broken.push({ + file: rel, + line: lineNum + 1, + link: raw, + reason: `route/file not found (terminal: ${resolvedRoute.terminal})`, + }); + continue; + } + } + continue; + } + + if (!clean.startsWith(".") && !clean.includes("/")) { + continue; + } + + const normalizedRel = normalizeSlashes(path.normalize(path.join(baseDir, clean))); + + if (/\.[a-zA-Z0-9]+$/.test(normalizedRel)) { + if (!relAllFiles.has(normalizedRel)) { broken.push({ file: rel, line: lineNum + 1, link: raw, - reason: `route/file not found (terminal: ${resolvedRoute.terminal})`, + reason: "relative file not found", }); - continue; } + continue; } - // Skip anchor validation - Mintlify generates anchors from MDX components, - // accordions, and config schemas that we can't reliably extract from markdown. - continue; - } - // Relative placeholder strings used in code examples (for example "url") - // are intentionally skipped. - if (!clean.startsWith(".") && !clean.includes("/")) { - continue; - } + const candidates = [ + normalizedRel, + `${normalizedRel}.md`, + `${normalizedRel}.mdx`, + `${normalizedRel}/index.md`, + `${normalizedRel}/index.mdx`, + ]; - const normalizedRel = normalizeSlashes(path.normalize(path.join(baseDir, clean))); - - if (/\.[a-zA-Z0-9]+$/.test(normalizedRel)) { - if (!relAllFiles.has(normalizedRel)) { + if (!candidates.some((candidate) => relAllFiles.has(candidate))) { broken.push({ file: rel, line: lineNum + 1, link: raw, - reason: "relative file not found", + reason: "relative doc target not found", }); } - continue; - } - - const candidates = [ - normalizedRel, - `${normalizedRel}.md`, - `${normalizedRel}.mdx`, - `${normalizedRel}/index.md`, - `${normalizedRel}/index.mdx`, - ]; - - if (!candidates.some((candidate) => relAllFiles.has(candidate))) { - broken.push({ - file: rel, - line: lineNum + 1, - link: raw, - reason: "relative doc target not found", - }); } } } -} -for (const page of collectNavPageEntries(docsConfig.navigation || [])) { - if (isGeneratedTranslatedDoc(String(page))) { - continue; - } - checked++; - const route = normalizeRoute(page); - const resolvedRoute = resolveRoute(route); - if (resolvedRoute.ok) { - continue; + for (const page of collectNavPageEntries(docsConfig.navigation || [])) { + if (isGeneratedTranslatedDoc(String(page))) { + continue; + } + checked++; + const route = normalizeRoute(page); + const resolvedRoute = resolveRoute(route); + if (resolvedRoute.ok) { + continue; + } + + broken.push({ + file: "docs.json", + line: 0, + link: page, + reason: `navigation page not published (terminal: ${resolvedRoute.terminal})`, + }); } - broken.push({ - file: "docs.json", - line: 0, - link: page, - reason: `navigation page not published (terminal: ${resolvedRoute.terminal})`, - }); + return { checked, broken }; } -console.log(`checked_internal_links=${checked}`); -console.log(`broken_links=${broken.length}`); - -for (const item of broken) { - console.log(`${item.file}:${item.line} :: ${item.link} :: ${item.reason}`); +function isCliEntry() { + const cliArg = process.argv[1]; + return cliArg ? import.meta.url === pathToFileURL(cliArg).href : false; } -if (broken.length > 0) { - process.exit(1); +if (isCliEntry()) { + const { checked, broken } = auditDocsLinks(); + console.log(`checked_internal_links=${checked}`); + console.log(`broken_links=${broken.length}`); + + for (const item of broken) { + console.log(`${item.file}:${item.line} :: ${item.link} :: ${item.reason}`); + } + + if (broken.length > 0) { + process.exit(1); + } } diff --git a/src/scripts/docs-link-audit.test.ts b/src/scripts/docs-link-audit.test.ts new file mode 100644 index 00000000000..34b9958d3f4 --- /dev/null +++ b/src/scripts/docs-link-audit.test.ts @@ -0,0 +1,31 @@ +import { describe, expect, it } from "vitest"; + +const { normalizeRoute, resolveRoute } = + (await import("../../scripts/docs-link-audit.mjs")) as unknown as { + normalizeRoute: (route: string) => string; + resolveRoute: ( + route: string, + options?: { redirects?: Map; routes?: Set }, + ) => { ok: boolean; terminal: string; loop?: boolean }; + }; + +describe("docs-link-audit", () => { + it("normalizes route fragments away", () => { + expect(normalizeRoute("/plugins/building-plugins#registering-agent-tools")).toBe( + "/plugins/building-plugins", + ); + expect(normalizeRoute("/plugins/building-plugins?tab=all")).toBe("/plugins/building-plugins"); + }); + + it("resolves redirects that land on anchored sections", () => { + const redirects = new Map([ + ["/plugins/agent-tools", "/plugins/building-plugins#registering-agent-tools"], + ]); + const routes = new Set(["/plugins/building-plugins"]); + + expect(resolveRoute("/plugins/agent-tools", { redirects, routes })).toEqual({ + ok: true, + terminal: "/plugins/building-plugins", + }); + }); +});