Generator

Multi-page Broken Link Crawler

Site-wide broken link crawl requires server-side recursive fetching. Use the existing scanner or run the Node snippet below.

For a single page

The existing Broken Links tool checks up to 20 links per page. For multi-page, run the snippet below on a server.

Multi-page crawler skeleton (Node)
import { safeFetch } from "@/lib/scanner/proxy";
import * as cheerio from "cheerio";

export async function crawl(startUrl: string, maxDepth = 2) {
  const visited = new Set<string>();
  const broken: { from: string; to: string; status: number }[] = [];
  const origin = new URL(startUrl).origin;
  
  async function check(url: string, depth: number) {
    if (visited.has(url) || depth > maxDepth) return;
    visited.add(url);
    const res = await safeFetch(url, { signal: AbortSignal.timeout(8000) });
    if (!res.ok) return;
    const html = await res.text();
    const $ = cheerio.load(html);
    const links = $("a[href]").map((_, a) => $(a).attr("href")).get();
    for (const link of links) {
      const abs = new URL(link, url).href;
      const r = await safeFetch(abs, { method: "HEAD", signal: AbortSignal.timeout(5000) });
      if (!r.ok) broken.push({ from: url, to: abs, status: r.status });
      if (abs.startsWith(origin)) await check(abs, depth + 1);
    }
  }
  await check(startUrl, 0);
  return broken;
}