fix(core): broken links optimization behaves differently than non-optimized logic (#9791)

2025-08-04 09:19:16 +02:00 · 2024-01-25 19:49:45 +01:00 · 2024-01-25 19:49:45 +01:00 · d3142c5ed5
commit d3142c5ed5
parent 2f2ed41829
3 changed files with 168 additions and 10 deletions
--- a/packages/docusaurus-types/src/routing.d.ts
+++ b/packages/docusaurus-types/src/routing.d.ts
@ -60,6 +60,11 @@ export type RouteConfig = {
  routes?: RouteConfig[];
  /** React router config option: `exact` routes would not match subroutes. */
  exact?: boolean;
+  /**
+   * React router config option: `strict` routes are sensitive to the presence
+   * of a trailing slash.
+   */
+  strict?: boolean;
  /** Used to sort routes. Higher-priority routes will be placed first. */
  priority?: number;
  /** Extra props; will be copied to routes.js. */
--- a/packages/docusaurus/src/server/tests/brokenLinks.test.ts
+++ b/packages/docusaurus/src/server/tests/brokenLinks.test.ts
@ -13,7 +13,12 @@ import type {RouteConfig} from '@docusaurus/types';
 type Params = Parameters<typeof handleBrokenLinks>[0];

 // We don't need all the routes attributes for our tests
-type SimpleRoute = {path: string; routes?: SimpleRoute[]};
+type SimpleRoute = {
+  path: string;
+  routes?: SimpleRoute[];
+  exact?: boolean;
+  strict?: boolean;
+};

 // Conveniently apply defaults to function under test
 async function testBrokenLinks(params: {
@ -43,6 +48,52 @@ describe('handleBrokenLinks', () => {
    });
  });

+  it('accepts valid non-exact link', async () => {
+    await testBrokenLinks({
+      routes: [{path: '/page1', exact: false}, {path: '/page2/'}],
+      collectedLinks: {
+        '/page1': {
+          links: [
+            '/page1',
+            '/page1/',
+            '/page2',
+            '/page2/',
+            '/page1/subPath',
+            '/page2/subPath',
+          ],
+          anchors: [],
+        },
+        '/page2/': {
+          links: [
+            '/page1',
+            '/page1/',
+            '/page2',
+            '/page2/',
+            '/page1/subPath',
+            '/page2/subPath',
+          ],
+          anchors: [],
+        },
+      },
+    });
+  });
+
+  it('accepts valid non-strict link', async () => {
+    await testBrokenLinks({
+      routes: [{path: '/page1', strict: false}, {path: '/page2/'}],
+      collectedLinks: {
+        '/page1': {
+          links: ['/page1', '/page1/', '/page2', '/page2/'],
+          anchors: [],
+        },
+        '/page2/': {
+          links: ['/page1', '/page1/', '/page2', '/page2/'],
+          anchors: [],
+        },
+      },
+    });
+  });
+
  it('accepts valid link to uncollected page', async () => {
    await testBrokenLinks({
      routes: [{path: '/page1'}, {path: '/page2'}],
@ -292,6 +343,76 @@ describe('handleBrokenLinks', () => {
    `);
  });

+  it('rejects broken link due to strict matching', async () => {
+    await expect(() =>
+      testBrokenLinks({
+        routes: [
+          {path: '/page1', strict: true},
+          {path: '/page2/', strict: true},
+        ],
+
+        collectedLinks: {
+          '/page1': {
+            links: ['/page1', '/page1/', '/page2', '/page2/'],
+            anchors: [],
+          },
+          '/page2/': {
+            links: ['/page1', '/page1/', '/page2', '/page2/'],
+            anchors: [],
+          },
+        },
+      }),
+    ).rejects.toThrowErrorMatchingInlineSnapshot(`
+      "Docusaurus found broken links!
+
+      Please check the pages of your site in the list below, and make sure you don't reference any path that does not exist.
+      Note: it's possible to ignore broken links with the 'onBrokenLinks' Docusaurus configuration, and let the build pass.
+
+      Exhaustive list of all broken links found:
+      - Broken link on source page path = /page1:
+         -> linking to /page2
+      - Broken link on source page path = /page2/:
+         -> linking to /page2
+      "
+    `);
+  });
+
+  it('rejects broken link due to strict exact matching', async () => {
+    await expect(() =>
+      testBrokenLinks({
+        routes: [
+          {path: '/page1', exact: true, strict: true},
+          {path: '/page2/', exact: true, strict: true},
+        ],
+
+        collectedLinks: {
+          '/page1': {
+            links: ['/page1', '/page1/', '/page2', '/page2/'],
+            anchors: [],
+          },
+          '/page2/': {
+            links: ['/page1', '/page1/', '/page2', '/page2/'],
+            anchors: [],
+          },
+        },
+      }),
+    ).rejects.toThrowErrorMatchingInlineSnapshot(`
+      "Docusaurus found broken links!
+
+      Please check the pages of your site in the list below, and make sure you don't reference any path that does not exist.
+      Note: it's possible to ignore broken links with the 'onBrokenLinks' Docusaurus configuration, and let the build pass.
+
+      Exhaustive list of all broken links found:
+      - Broken link on source page path = /page1:
+         -> linking to /page1/
+         -> linking to /page2
+      - Broken link on source page path = /page2/:
+         -> linking to /page1/
+         -> linking to /page2
+      "
+    `);
+  });
+
  it('rejects broken link with anchor', async () => {
    await expect(() =>
      testBrokenLinks({
@ -728,6 +849,10 @@ describe('handleBrokenLinks', () => {
    const routes: SimpleRoute[] = [
      ...Array.from<SimpleRoute>({length: scale}).map((_, i) => ({
        path: `/page${i}`,
+        exact: true,
+      })),
+      ...Array.from<SimpleRoute>({length: scale}).map((_, i) => ({
+        path: `/page/nonExact/${i}`,
      })),
      ...Array.from<SimpleRoute>({length: scale}).fill({
        path: '/pageDynamic/:subpath1',
@ -741,6 +866,7 @@ describe('handleBrokenLinks', () => {
          links: [
            ...Array.from<SimpleRoute>({length: scale}).flatMap((_2, j) => [
              `/page${j}`,
+              `/page/nonExact/${j}`,
              `/page${j}?age=42`,
              `/page${j}#anchor${j}`,
              `/page${j}?age=42#anchor${j}`,
@ -770,9 +896,9 @@ describe('handleBrokenLinks', () => {
    // See https://github.com/facebook/docusaurus/issues/9754
    // See https://twitter.com/sebastienlorber/status/1749392773415858587
    // We expect no more matchRoutes calls than number of dynamic route links
-    expect(matchRoutesMock).toHaveBeenCalledTimes(scale);
+    expect(matchRoutesMock).toHaveBeenCalledTimes(scale * 2);
    // We expect matchRoutes to be called with a reduced number of routes
-    expect(routes).toHaveLength(scale * 2);
-    expect(matchRoutesMock.mock.calls[0]![0]).toHaveLength(scale);
+    expect(routes).toHaveLength(scale * 3);
+    expect(matchRoutesMock.mock.calls[0]![0]).toHaveLength(scale * 2);
  });
 });
--- a/packages/docusaurus/src/server/brokenLinks.ts
+++ b/packages/docusaurus/src/server/brokenLinks.ts
@ -8,7 +8,13 @@
 import _ from 'lodash';
 import logger from '@docusaurus/logger';
 import {matchRoutes as reactRouterMatchRoutes} from 'react-router-config';
-import {parseURLPath, serializeURLPath, type URLPath} from '@docusaurus/utils';
+import {
+  addTrailingSlash,
+  parseURLPath,
+  removeTrailingSlash,
+  serializeURLPath,
+  type URLPath,
+} from '@docusaurus/utils';
 import {getAllFinalRoutes} from './utils';
 import type {RouteConfig, ReportingSeverity} from '@docusaurus/types';

@ -55,16 +61,37 @@ function createBrokenLinksHelper({
 }): BrokenLinksHelper {
  const validPathnames = new Set(collectedLinks.keys());

+  // IMPORTANT: this is an optimization
+  // See https://github.com/facebook/docusaurus/issues/9754
  // Matching against the route array can be expensive
  // If the route is already in the valid pathnames,
-  // we can avoid matching against it as an optimization
-  const remainingRoutes = routes.filter(
-    (route) => !validPathnames.has(route.path),
-  );
+  // we can avoid matching against it
+  const remainingRoutes = (function filterRoutes() {
+    // Goal: unit tests should behave the same with this enabled or disabled
+    const disableOptimization = false;
+    if (disableOptimization) {
+      return routes;
+    }
+    // We must consider the "exact" and "strict" match attribute
+    // We can only infer pre-validated pathnames from a route from exact routes
+    const [validPathnameRoutes, otherRoutes] = _.partition(
+      routes,
+      (route) => route.exact && validPathnames.has(route.path),
+    );
+    // If a route is non-strict (non-sensitive to trailing slashes)
+    // We must pre-validate all possible paths
+    validPathnameRoutes.forEach((validPathnameRoute) => {
+      if (!validPathnameRoute.strict) {
+        validPathnames.add(addTrailingSlash(validPathnameRoute.path));
+        validPathnames.add(removeTrailingSlash(validPathnameRoute.path));
+      }
+    });
+    return otherRoutes;
+  })();

  function isPathnameMatchingAnyRoute(pathname: string): boolean {
    if (matchRoutes(remainingRoutes, pathname).length > 0) {
-      // IMPORTANT: this is an optimization here
+      // IMPORTANT: this is an optimization
      // See https://github.com/facebook/docusaurus/issues/9754
      // Large Docusaurus sites have many routes!
      // We try to minimize calls to a possibly expensive matchRoutes function