feat(v2): broken links detection (#3059)

* add broken links checker

* polish

* finalize broken links detection feature

* note broken links is only for prod build

* fix broken link on template

* fix test snapshot

* fix bad merge
This commit is contained in:
Sébastien Lorber 2020-07-21 19:13:34 +02:00 committed by GitHub
parent f4434b2e42
commit 8ff28e3fe4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 421 additions and 30 deletions

View file

@ -63,8 +63,6 @@ Strikethrough uses two tildes. ~~Scratch this.~~
[I'm a reference-style link][arbitrary case-insensitive reference text]
[I'm a relative reference to a repository file](../blob/master/LICENSE)
[You can use numbers for reference-style link definitions][1]
Or leave it empty and use the [link text itself].

View file

@ -3,6 +3,7 @@ module.exports = {
tagline: 'The tagline of my site',
url: 'https://your-docusaurus-test-site.com',
baseUrl: '/',
onBrokenLinks: 'throw',
favicon: 'img/favicon.ico',
organizationName: 'facebook', // Usually your GitHub org/user name.
projectName: 'docusaurus', // Usually your repo name.

View file

@ -63,8 +63,6 @@ Strikethrough uses two tildes. ~~Scratch this.~~
[I'm a reference-style link][arbitrary case-insensitive reference text]
[I'm a relative reference to a repository file](../blob/master/LICENSE)
[You can use numbers for reference-style link definitions][1]
Or leave it empty and use the [link text itself].

View file

@ -3,6 +3,7 @@ module.exports = {
tagline: 'The tagline of my site',
url: 'https://your-docusaurus-test-site.com',
baseUrl: '/',
onBrokenLinks: 'throw',
favicon: 'img/favicon.ico',
organizationName: 'facebook', // Usually your GitHub org/user name.
projectName: 'docusaurus', // Usually your repo name.

View file

@ -63,8 +63,6 @@ Strikethrough uses two tildes. ~~Scratch this.~~
[I'm a reference-style link][arbitrary case-insensitive reference text]
[I'm a relative reference to a repository file](../blob/master/LICENSE)
[You can use numbers for reference-style link definitions][1]
Or leave it empty and use the [link text itself].

View file

@ -12,6 +12,7 @@ module.exports = {
tagline: 'The tagline of my site',
url: 'https://your-docusaurus-test-site.com',
baseUrl: '/',
onBrokenLinks: 'throw',
favicon: 'img/favicon.ico',
organizationName: 'facebook', // Usually your GitHub org/user name.
projectName: 'docusaurus', // Usually your repo name.

View file

@ -10,12 +10,15 @@ import {Command} from 'commander';
import {ParsedUrlQueryInput} from 'querystring';
import {MergeStrategy} from 'webpack-merge';
export type OnBrokenLinks = 'ignore' | 'log' | 'error' | 'throw';
export interface DocusaurusConfig {
baseUrl: string;
favicon: string;
tagline?: string;
title: string;
url: string;
onBrokenLinks: OnBrokenLinks;
organizationName?: string;
projectName?: string;
githubHost?: string;
@ -111,6 +114,7 @@ export interface InjectedHtmlTags {
export type HtmlTags = string | HtmlTagObject | (string | HtmlTagObject)[];
export interface Props extends LoadContext, InjectedHtmlTags {
routes: RouteConfig[];
routesPaths: string[];
plugins: Plugin<any, unknown>[];
}

View file

@ -73,6 +73,7 @@
"import-fresh": "^3.2.1",
"inquirer": "^7.2.0",
"is-root": "^2.1.0",
"lodash": "^4.5.2",
"lodash.has": "^4.5.2",
"lodash.isplainobject": "^4.0.6",
"lodash.isstring": "^4.0.1",
@ -90,6 +91,7 @@
"react-router": "^5.1.2",
"react-router-config": "^5.1.1",
"react-router-dom": "^5.1.2",
"resolve-pathname": "^3.0.0",
"semver": "^6.3.0",
"serve-handler": "^6.1.3",
"shelljs": "^0.8.4",

View file

@ -0,0 +1,50 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import React, {ReactNode, useContext, createContext} from 'react';
type LinksCollector = {
collectLink: (link: string) => void;
};
type StatefulLinksCollector = LinksCollector & {
getCollectedLinks: () => string[];
};
export const createStatefulLinksCollector = (): StatefulLinksCollector => {
// Set to dedup, as it's not useful to collect multiple times the same link
const allLinks = new Set<string>();
return {
collectLink: (link: string): void => {
allLinks.add(link);
},
getCollectedLinks: (): string[] => {
return [...allLinks];
},
};
};
const Context = createContext<LinksCollector>({
collectLink: () => {
// noop by default for client
// we only use the broken links checker server-side
},
});
export const useLinksCollector = () => {
return useContext(Context);
};
export const ProvideLinksCollector = ({
children,
linksCollector,
}: {
children: ReactNode;
linksCollector: LinksCollector;
}) => {
return <Context.Provider value={linksCollector}>{children}</Context.Provider>;
};

View file

@ -10,6 +10,7 @@ import React, {ReactNode, useEffect, useRef} from 'react';
import {NavLink, Link as RRLink} from 'react-router-dom';
import isInternalUrl from './isInternalUrl';
import ExecutionEnvironment from './ExecutionEnvironment';
import {useLinksCollector} from '../LinksCollector';
declare global {
interface Window {
@ -26,6 +27,7 @@ interface Props {
}
function Link({isNavLink, activeClassName, ...props}: Props): JSX.Element {
const linksCollector = useLinksCollector();
const {to, href} = props;
const targetLink = to || href;
const isInternal = isInternalUrl(targetLink);
@ -84,7 +86,14 @@ function Link({isNavLink, activeClassName, ...props}: Props): JSX.Element {
};
}, [targetLink, IOSupported, isInternal]);
return !targetLink || !isInternal || targetLink.startsWith('#') ? (
const isAnchorLink = targetLink?.startsWith('#') ?? false;
const isRegularHtmlLink = !targetLink || !isInternal || isAnchorLink;
if (isInternal && !isAnchorLink) {
linksCollector.collectLink(targetLink);
}
return isRegularHtmlLink ? (
// eslint-disable-next-line jsx-a11y/anchor-has-content
<a
// @ts-expect-error: href specified twice needed to pass children and other user specified props

View file

@ -22,22 +22,38 @@ import packageJson from '../../package.json';
import preload from './preload';
// eslint-disable-next-line import/no-unresolved
import App from './App';
import {
createStatefulLinksCollector,
ProvideLinksCollector,
} from './LinksCollector';
import ssrTemplate from './templates/ssr.html.template';
// Renderer for static-site-generator-webpack-plugin (async rendering via promises).
export default async function render(locals) {
const {routesLocation, headTags, preBodyTags, postBodyTags} = locals;
const {
routesLocation,
headTags,
preBodyTags,
postBodyTags,
onLinksCollected,
baseUrl,
} = locals;
const location = routesLocation[locals.path];
await preload(routes, location);
const modules = new Set();
const context = {};
const linksCollector = createStatefulLinksCollector();
const appHtml = ReactDOMServer.renderToString(
<Loadable.Capture report={(moduleName) => modules.add(moduleName)}>
<StaticRouter location={location} context={context}>
<App />
<ProvideLinksCollector linksCollector={linksCollector}>
<App />
</ProvideLinksCollector>
</StaticRouter>
</Loadable.Capture>,
);
onLinksCollected(location, linksCollector.getCollectedLinks());
const helmet = Helmet.renderStatic();
const htmlAttributes = helmet.htmlAttributes.toString();
@ -59,7 +75,6 @@ export default async function render(locals) {
const bundles = getBundles(manifest, modulesToBeLoaded);
const stylesheets = (bundles.css || []).map((b) => b.file);
const scripts = (bundles.js || []).map((b) => b.file);
const {baseUrl} = locals;
const renderedHtml = eta.render(
ssrTemplate.trim(),

View file

@ -15,6 +15,8 @@ import {BundleAnalyzerPlugin} from 'webpack-bundle-analyzer';
import merge from 'webpack-merge';
import {STATIC_DIR_NAME} from '../constants';
import {load} from '../server';
import {handleBrokenLinks} from '../server/brokenLinks';
import {BuildCLIOptions, Props} from '@docusaurus/types';
import createClientConfig from '../webpack/client';
import createServerConfig from '../webpack/server';
@ -33,7 +35,13 @@ export default async function build(
const props: Props = await load(siteDir, cliOptions.outDir);
// Apply user webpack config.
const {outDir, generatedFilesDir, plugins} = props;
const {
outDir,
generatedFilesDir,
plugins,
siteConfig: {onBrokenLinks},
routes,
} = props;
const clientManifestPath = path.join(
generatedFilesDir,
@ -55,7 +63,14 @@ export default async function build(
},
);
let serverConfig: Configuration = createServerConfig(props);
const allCollectedLinks: Record<string, string[]> = {};
let serverConfig: Configuration = createServerConfig({
props,
onLinksCollected: (staticPagePath, links) => {
allCollectedLinks[staticPagePath] = links;
},
});
const staticDir = path.resolve(siteDir, STATIC_DIR_NAME);
if (fs.existsSync(staticDir)) {
@ -124,6 +139,8 @@ export default async function build(
}),
);
handleBrokenLinks({allCollectedLinks, routes, onBrokenLinks});
const relativeDir = path.relative(process.cwd(), outDir);
console.log(
`\n${chalk.green('Success!')} Generated static files in ${chalk.cyan(
@ -135,5 +152,6 @@ export default async function build(
if (forceTerminate && !cliOptions.bundleAnalyzer) {
process.exit(0);
}
return outDir;
}

View file

@ -0,0 +1,13 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`brokenLinks getBrokenLinksErrorMessage 1`] = `
"Broken links found!
- Page path = /docs/mySourcePage:
-> link to ./myBrokenLink (resolved as: /docs/myBrokenLink)
-> link to ../otherBrokenLink (resolved as: /otherBrokenLink),
- Page path = /otherSourcePage:
-> link to /badLink
"
`;

View file

@ -20,6 +20,7 @@ Object {
"baseUrl": "/",
"customFields": Object {},
"favicon": "img/docusaurus.ico",
"onBrokenLinks": "throw",
"organizationName": "endiliey",
"plugins": Array [
Array [

View file

@ -0,0 +1,108 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {getBrokenLinksErrorMessage, getAllBrokenLinks} from '../brokenLinks';
import {RouteConfig} from '@docusaurus/types';
describe('brokenLinks', () => {
test('getBrokenLinksErrorMessage', async () => {
const message = getBrokenLinksErrorMessage({
'/docs/mySourcePage': [
{link: './myBrokenLink', resolvedLink: '/docs/myBrokenLink'},
{link: '../otherBrokenLink', resolvedLink: '/otherBrokenLink'},
],
'/otherSourcePage': [{link: '/badLink', resolvedLink: '/badLink'}],
});
expect(message).toMatchSnapshot();
});
test('getAllBrokenLinks', async () => {
const routes: RouteConfig[] = [
{
path: '/docs',
component: '',
routes: [
{path: '/docs/someDoc', component: ''},
{path: '/docs/someOtherDoc', component: ''},
],
},
{
path: '/community',
component: '',
},
{
path: '*',
component: '',
},
];
const allCollectedLinks = {
'/docs/someDoc': [
// Good links
'./someOtherDoc#someHash',
'/docs/someOtherDoc?someQueryString=true#someHash',
'../docs/someOtherDoc?someQueryString=true',
'../docs/someOtherDoc#someHash',
// Bad links
'../someOtherDoc',
'./docThatDoesNotExist',
'./badRelativeLink',
'../badRelativeLink',
],
'/community': [
// Good links
'/docs/someDoc',
'/docs/someOtherDoc#someHash',
'./docs/someDoc#someHash',
'./docs/someOtherDoc',
// Bad links
'/someOtherDoc',
'/badLink',
'./badLink',
],
};
const expectedBrokenLinks = {
'/docs/someDoc': [
{
link: '../someOtherDoc',
resolvedLink: '/someOtherDoc',
},
{
link: './docThatDoesNotExist',
resolvedLink: '/docs/docThatDoesNotExist',
},
{
link: './badRelativeLink',
resolvedLink: '/docs/badRelativeLink',
},
{
link: '../badRelativeLink',
resolvedLink: '/badRelativeLink',
},
],
'/community': [
{
link: '/someOtherDoc',
resolvedLink: '/someOtherDoc',
},
{
link: '/badLink',
resolvedLink: '/badLink',
},
{
link: './badLink',
resolvedLink: '/badLink',
},
],
};
expect(getAllBrokenLinks({allCollectedLinks, routes})).toEqual(
expectedBrokenLinks,
);
});
});

View file

@ -0,0 +1,145 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {matchRoutes, RouteConfig as RRRouteConfig} from 'react-router-config';
import resolvePathname from 'resolve-pathname';
import chalk from 'chalk';
import {mapValues, pickBy, flatMap} from 'lodash';
import {RouteConfig, OnBrokenLinks} from '@docusaurus/types';
function toReactRouterRoutes(routes: RouteConfig[]): RRRouteConfig[] {
// @ts-expect-error: types incompatible???
return routes as RRRouteConfig[];
}
type BrokenLink = {
link: string;
resolvedLink: string;
};
// matchRoutes does not support qs/anchors, so we remove it!
function onlyPathname(link: string) {
return link.split('#')[0].split('?')[0];
}
function getPageBrokenLinks({
pagePath,
pageLinks,
routes,
}: {
pagePath: string;
pageLinks: string[];
routes: RouteConfig[];
}): BrokenLink[] {
// ReactRouter is able to support links like ./../somePath
// but matchRoutes does not do this resolving internally
// we must resolve the links before using matchRoutes
// resolvePathname is used internally by ReactRouter
function resolveLink(link: string) {
const resolvedLink = resolvePathname(onlyPathname(link), pagePath);
return {link, resolvedLink};
}
function isBrokenLink(link: string) {
const matchedRoutes = matchRoutes(toReactRouterRoutes(routes), link);
return matchedRoutes.length === 0;
}
return pageLinks.map(resolveLink).filter((l) => isBrokenLink(l.resolvedLink));
}
// The route defs can be recursive, and have a parent match-all route
// We don't want to match broken links like /docs/brokenLink against /docs/*
// For this reason, we only consider the "final routes", that do not have subroutes
// We also need to remove the match all 404 route
function filterIntermediateRoutes(routesInput: RouteConfig[]): RouteConfig[] {
function getFinalRoutes(route: RouteConfig): RouteConfig[] {
return route.routes ? flatMap(route.routes, getFinalRoutes) : [route];
}
const routesWithout404 = routesInput.filter((route) => route.path !== '*');
return flatMap(routesWithout404, getFinalRoutes);
}
export function getAllBrokenLinks({
allCollectedLinks,
routes,
}: {
allCollectedLinks: Record<string, string[]>;
routes: RouteConfig[];
}): Record<string, BrokenLink[]> {
const filteredRoutes = filterIntermediateRoutes(routes);
const allBrokenLinks = mapValues(allCollectedLinks, (pageLinks, pagePath) => {
return getPageBrokenLinks({pageLinks, pagePath, routes: filteredRoutes});
});
// remove pages without any broken link
return pickBy(allBrokenLinks, (brokenLinks) => brokenLinks.length > 0);
}
export function getBrokenLinksErrorMessage(
allBrokenLinks: Record<string, BrokenLink[]>,
): string | undefined {
if (Object.keys(allBrokenLinks).length === 0) {
return undefined;
}
function brokenLinkMessage(brokenLink: BrokenLink): string {
const showResolvedLink = brokenLink.link !== brokenLink.resolvedLink;
return `${brokenLink.link}${
showResolvedLink ? ` (resolved as: ${brokenLink.resolvedLink})` : ''
}`;
}
function pageBrokenLinksMessage(
pagePath: string,
brokenLinks: BrokenLink[],
): string {
return `\n\n- Page path = ${pagePath}:\n -> link to ${brokenLinks
.map(brokenLinkMessage)
.join('\n -> link to ')}`;
}
return (
`Broken links found!` +
`${Object.entries(allBrokenLinks).map(([pagePath, brokenLinks]) =>
pageBrokenLinksMessage(pagePath, brokenLinks),
)}
`
);
}
export function handleBrokenLinks({
allCollectedLinks,
onBrokenLinks,
routes,
}: {
allCollectedLinks: Record<string, string[]>;
onBrokenLinks: OnBrokenLinks;
routes: RouteConfig[];
}) {
if (onBrokenLinks === 'ignore') {
return;
}
const allBrokenLinks = getAllBrokenLinks({allCollectedLinks, routes});
const errorMessage = getBrokenLinksErrorMessage(allBrokenLinks);
if (errorMessage) {
// Useful to ensure the CI fails in case of broken link
if (onBrokenLinks === 'throw') {
throw new Error(
`${errorMessage}\nNote: it's possible to ignore broken links with the 'onBrokenLinks' Docusaurus configuration.`,
);
} else if (onBrokenLinks === 'error') {
console.error(chalk.red(errorMessage));
} else if (onBrokenLinks === 'log') {
console.log(chalk.blue(errorMessage));
} else {
throw new Error(`unexpected onBrokenLinks value=${onBrokenLinks}`);
}
}
}

View file

@ -5,21 +5,20 @@
* LICENSE file in the root directory of this source tree.
*/
import {PluginConfig, DocusaurusConfig} from '@docusaurus/types';
import {DocusaurusConfig} from '@docusaurus/types';
import Joi from '@hapi/joi';
import {CONFIG_FILE_NAME} from '../constants';
export const DEFAULT_CONFIG: {
plugins: PluginConfig[];
themes: PluginConfig[];
presets: PluginConfig[];
customFields: {
[key: string]: unknown;
};
themeConfig: {
[key: string]: unknown;
};
} = {
export const DEFAULT_CONFIG: Pick<
DocusaurusConfig,
| 'onBrokenLinks'
| 'plugins'
| 'themes'
| 'presets'
| 'customFields'
| 'themeConfig'
> = {
onBrokenLinks: 'throw',
plugins: [],
themes: [],
presets: [],
@ -50,6 +49,9 @@ const ConfigSchema = Joi.object({
favicon: Joi.string().required(),
title: Joi.string().required(),
url: Joi.string().uri().required(),
onBrokenLinks: Joi.string()
.equal('ignore', 'log', 'error', 'throw')
.default(DEFAULT_CONFIG.onBrokenLinks),
organizationName: Joi.string(),
projectName: Joi.string(),
customFields: Joi.object().unknown().default(DEFAULT_CONFIG.customFields),

View file

@ -223,6 +223,7 @@ ${Object.keys(registry)
outDir,
baseUrl,
generatedFilesDir,
routes: pluginsRouteConfigs,
routesPaths,
plugins,
headTags,

View file

@ -14,7 +14,7 @@ describe('webpack production config', () => {
test('simple', async () => {
console.log = jest.fn();
const props = await loadSetup('simple');
const config = createServerConfig(props);
const config = createServerConfig({props});
const errors = validate(config);
expect(errors.length).toBe(0);
});
@ -22,7 +22,7 @@ describe('webpack production config', () => {
test('custom', async () => {
console.log = jest.fn();
const props = await loadSetup('custom');
const config = createServerConfig(props);
const config = createServerConfig({props});
const errors = validate(config);
expect(errors.length).toBe(0);
});

View file

@ -15,10 +15,15 @@ import {createBaseConfig} from './base';
import WaitPlugin from './plugins/WaitPlugin';
import LogPlugin from './plugins/LogPlugin';
export default function createServerConfig(
props: Props,
minify: boolean = true,
): Configuration {
export default function createServerConfig({
props,
minify = true,
onLinksCollected = () => {},
}: {
props: Props;
minify?: boolean;
onLinksCollected?: (staticPagePath: string, links: string[]) => void;
}): Configuration {
const {
baseUrl,
routesPaths,
@ -64,6 +69,7 @@ export default function createServerConfig(
headTags,
preBodyTags,
postBodyTags,
onLinksCollected,
},
paths: ssgPaths,
}),

View file

@ -79,6 +79,20 @@ module.exports = {
## Optional fields
### `onBrokenLinks`
- Type: `'ignore' | 'log' | 'error' | 'throw'`
The behavior of Docusaurus, when it detects any broken link.
By default, it throws an error, to ensure you never ship any broken link, but you can lower this security if needed.
:::note
The broken links detection is only available for a production build (`docusaurus build`).
:::
### `tagline`
- Type: `string`

View file

@ -21,6 +21,7 @@ module.exports = {
projectName: 'docusaurus',
baseUrl: '/',
url: 'https://v2.docusaurus.io',
onBrokenLinks: 'throw',
favicon: 'img/docusaurus.ico',
customFields: {
description:

View file

@ -11688,6 +11688,11 @@ lodash@^4.15.0, lodash@^4.17.11, lodash@^4.17.12, lodash@^4.17.13, lodash@^4.17.
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.15.tgz#b447f6670a0455bbfeedd11392eff330ea097548"
integrity sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==
lodash@^4.5.2:
version "4.17.19"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.19.tgz#e48ddedbe30b3321783c5b4301fbd353bc1e4a4b"
integrity sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ==
log-symbols@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/log-symbols/-/log-symbols-1.0.2.tgz#376ff7b58ea3086a0f09facc74617eca501e1a18"