fix(sitemap): exclude pages with robots noindex from sitemap (#7143)

This commit is contained in:
Joshua Chen 2022-04-14 17:31:09 +08:00 committed by GitHub
parent 6306cbc266
commit 03516dc3a7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 105 additions and 41 deletions

View file

@ -9,8 +9,6 @@ import type {LoadContext, Plugin} from '@docusaurus/types';
import {docuHash, normalizeUrl, posixPath} from '@docusaurus/utils'; import {docuHash, normalizeUrl, posixPath} from '@docusaurus/utils';
import path from 'path'; import path from 'path';
export const routeBasePath = '__docusaurus/debug';
export default function pluginDebug({ export default function pluginDebug({
siteConfig: {baseUrl}, siteConfig: {baseUrl},
generatedFilesDir, generatedFilesDir,
@ -42,37 +40,37 @@ export default function pluginDebug({
// Home is config (duplicate for now) // Home is config (duplicate for now)
addRoute({ addRoute({
path: normalizeUrl([baseUrl, routeBasePath]), path: normalizeUrl([baseUrl, '__docusaurus/debug']),
component: '@theme/DebugConfig', component: '@theme/DebugConfig',
exact: true, exact: true,
}); });
addRoute({ addRoute({
path: normalizeUrl([baseUrl, routeBasePath, 'config']), path: normalizeUrl([baseUrl, '__docusaurus/debug/config']),
component: '@theme/DebugConfig', component: '@theme/DebugConfig',
exact: true, exact: true,
}); });
addRoute({ addRoute({
path: normalizeUrl([baseUrl, routeBasePath, 'metadata']), path: normalizeUrl([baseUrl, '__docusaurus/debug/metadata']),
component: '@theme/DebugSiteMetadata', component: '@theme/DebugSiteMetadata',
exact: true, exact: true,
}); });
addRoute({ addRoute({
path: normalizeUrl([baseUrl, routeBasePath, 'registry']), path: normalizeUrl([baseUrl, '__docusaurus/debug/registry']),
component: '@theme/DebugRegistry', component: '@theme/DebugRegistry',
exact: true, exact: true,
}); });
addRoute({ addRoute({
path: normalizeUrl([baseUrl, routeBasePath, 'routes']), path: normalizeUrl([baseUrl, '__docusaurus/debug/routes']),
component: '@theme/DebugRoutes', component: '@theme/DebugRoutes',
exact: true, exact: true,
}); });
addRoute({ addRoute({
path: normalizeUrl([baseUrl, routeBasePath, 'content']), path: normalizeUrl([baseUrl, '__docusaurus/debug/content']),
component: '@theme/DebugContent', component: '@theme/DebugContent',
exact: true, exact: true,
modules: { modules: {
@ -81,7 +79,7 @@ export default function pluginDebug({
}); });
addRoute({ addRoute({
path: normalizeUrl([baseUrl, routeBasePath, 'globalData']), path: normalizeUrl([baseUrl, '__docusaurus/debug/globalData']),
component: '@theme/DebugGlobalData', component: '@theme/DebugGlobalData',
exact: true, exact: true,
}); });

View file

@ -7,10 +7,6 @@
/// <reference types="@docusaurus/module-type-aliases" /> /// <reference types="@docusaurus/module-type-aliases" />
declare module '@docusaurus/plugin-debug' {
export const routeBasePath: string;
}
declare module '@theme/DebugConfig' { declare module '@theme/DebugConfig' {
export default function DebugMetadata(): JSX.Element; export default function DebugMetadata(): JSX.Element;
} }

View file

@ -35,6 +35,7 @@ export default function DebugLayout({
<Head> <Head>
<html lang="en" /> <html lang="en" />
<title>Docusaurus debug panel</title> <title>Docusaurus debug panel</title>
<meta name="robots" content="noindex" />
</Head> </Head>
<div> <div>

View file

@ -5,6 +5,7 @@
* LICENSE file in the root directory of this source tree. * LICENSE file in the root directory of this source tree.
*/ */
import React from 'react';
import createSitemap from '../createSitemap'; import createSitemap from '../createSitemap';
import type {DocusaurusConfig} from '@docusaurus/types'; import type {DocusaurusConfig} from '@docusaurus/types';
import {EnumChangefreq} from 'sitemap'; import {EnumChangefreq} from 'sitemap';
@ -16,6 +17,7 @@ describe('createSitemap', () => {
url: 'https://example.com', url: 'https://example.com',
} as DocusaurusConfig, } as DocusaurusConfig,
['/', '/test'], ['/', '/test'],
{},
{ {
changefreq: EnumChangefreq.DAILY, changefreq: EnumChangefreq.DAILY,
priority: 0.7, priority: 0.7,
@ -29,7 +31,7 @@ describe('createSitemap', () => {
it('empty site', () => it('empty site', () =>
expect(async () => { expect(async () => {
await createSitemap({} as DocusaurusConfig, [], {}); await createSitemap({} as DocusaurusConfig, [], {}, {});
}).rejects.toThrow( }).rejects.toThrow(
'URL in docusaurus.config.js cannot be empty/undefined.', 'URL in docusaurus.config.js cannot be empty/undefined.',
)); ));
@ -40,6 +42,7 @@ describe('createSitemap', () => {
url: 'https://example.com', url: 'https://example.com',
} as DocusaurusConfig, } as DocusaurusConfig,
['/', '/404.html', '/my-page'], ['/', '/404.html', '/my-page'],
{},
{ {
changefreq: EnumChangefreq.DAILY, changefreq: EnumChangefreq.DAILY,
priority: 0.7, priority: 0.7,
@ -55,6 +58,7 @@ describe('createSitemap', () => {
url: 'https://example.com', url: 'https://example.com',
} as DocusaurusConfig, } as DocusaurusConfig,
['/', '/search/', '/tags/', '/search/foo', '/tags/foo/bar'], ['/', '/search/', '/tags/', '/search/foo', '/tags/foo/bar'],
{},
{ {
changefreq: EnumChangefreq.DAILY, changefreq: EnumChangefreq.DAILY,
priority: 0.7, priority: 0.7,
@ -78,6 +82,7 @@ describe('createSitemap', () => {
trailingSlash: undefined, trailingSlash: undefined,
} as DocusaurusConfig, } as DocusaurusConfig,
['/', '/test', '/nested/test', '/nested/test2/'], ['/', '/test', '/nested/test', '/nested/test2/'],
{},
{ {
changefreq: EnumChangefreq.DAILY, changefreq: EnumChangefreq.DAILY,
priority: 0.7, priority: 0.7,
@ -98,6 +103,7 @@ describe('createSitemap', () => {
trailingSlash: true, trailingSlash: true,
} as DocusaurusConfig, } as DocusaurusConfig,
['/', '/test', '/nested/test', '/nested/test2/'], ['/', '/test', '/nested/test', '/nested/test2/'],
{},
{ {
changefreq: EnumChangefreq.DAILY, changefreq: EnumChangefreq.DAILY,
priority: 0.7, priority: 0.7,
@ -118,6 +124,7 @@ describe('createSitemap', () => {
trailingSlash: false, trailingSlash: false,
} as DocusaurusConfig, } as DocusaurusConfig,
['/', '/test', '/nested/test', '/nested/test2/'], ['/', '/test', '/nested/test', '/nested/test2/'],
{},
{ {
changefreq: EnumChangefreq.DAILY, changefreq: EnumChangefreq.DAILY,
priority: 0.7, priority: 0.7,
@ -130,4 +137,30 @@ describe('createSitemap', () => {
expect(sitemap).toContain('<loc>https://example.com/nested/test</loc>'); expect(sitemap).toContain('<loc>https://example.com/nested/test</loc>');
expect(sitemap).toContain('<loc>https://example.com/nested/test2</loc>'); expect(sitemap).toContain('<loc>https://example.com/nested/test2</loc>');
}); });
it('filters pages with noindex', async () => {
const sitemap = await createSitemap(
{
url: 'https://example.com',
trailingSlash: false,
} as DocusaurusConfig,
['/', '/noindex', '/nested/test', '/nested/test2/'],
{
'/noindex': {
meta: {
toComponent: () => [
React.createElement('meta', {name: 'robots', content: 'noindex'}),
],
},
},
},
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
ignorePatterns: [],
},
);
expect(sitemap).not.toContain('/noindex');
});
}); });

View file

@ -10,10 +10,13 @@ import type {PluginOptions} from '@docusaurus/plugin-sitemap';
import type {DocusaurusConfig} from '@docusaurus/types'; import type {DocusaurusConfig} from '@docusaurus/types';
import {applyTrailingSlash} from '@docusaurus/utils-common'; import {applyTrailingSlash} from '@docusaurus/utils-common';
import {createMatcher} from '@docusaurus/utils'; import {createMatcher} from '@docusaurus/utils';
import type {HelmetServerState} from 'react-helmet-async';
import type {ReactElement} from 'react';
export default async function createSitemap( export default async function createSitemap(
siteConfig: DocusaurusConfig, siteConfig: DocusaurusConfig,
routesPaths: string[], routesPaths: string[],
head: {[location: string]: HelmetServerState},
options: PluginOptions, options: PluginOptions,
): Promise<string> { ): Promise<string> {
const {url: hostname} = siteConfig; const {url: hostname} = siteConfig;
@ -26,18 +29,29 @@ export default async function createSitemap(
const sitemapStream = new SitemapStream({hostname}); const sitemapStream = new SitemapStream({hostname});
routesPaths function routeShouldBeIncluded(route: string) {
.filter((route) => !route.endsWith('404.html') && !ignoreMatcher(route)) if (route.endsWith('404.html') || ignoreMatcher(route)) {
.forEach((routePath) => return false;
sitemapStream.write({ }
url: applyTrailingSlash(routePath, { // https://github.com/staylor/react-helmet-async/pull/167
trailingSlash: siteConfig.trailingSlash, const meta = head[route]?.meta.toComponent() as unknown as
baseUrl: siteConfig.baseUrl, | ReactElement[]
}), | undefined;
changefreq, return !meta?.some(
priority, (tag) => tag.props.name === 'robots' && tag.props.content === 'noindex',
}),
); );
}
routesPaths.filter(routeShouldBeIncluded).forEach((routePath) =>
sitemapStream.write({
url: applyTrailingSlash(routePath, {
trailingSlash: siteConfig.trailingSlash,
baseUrl: siteConfig.baseUrl,
}),
changefreq,
priority,
}),
);
sitemapStream.end(); sitemapStream.end();

View file

@ -18,7 +18,7 @@ export default function pluginSitemap(
return { return {
name: 'docusaurus-plugin-sitemap', name: 'docusaurus-plugin-sitemap',
async postBuild({siteConfig, routesPaths, outDir}) { async postBuild({siteConfig, routesPaths, outDir, head}) {
if (siteConfig.noIndex) { if (siteConfig.noIndex) {
return; return;
} }
@ -26,6 +26,7 @@ export default function pluginSitemap(
const generatedSitemap = await createSitemap( const generatedSitemap = await createSitemap(
siteConfig, siteConfig,
routesPaths, routesPaths,
head,
options, options,
); );

View file

@ -5,7 +5,6 @@
* LICENSE file in the root directory of this source tree. * LICENSE file in the root directory of this source tree.
*/ */
import {routeBasePath as debugPluginRouteBasePath} from '@docusaurus/plugin-debug';
import type { import type {
Preset, Preset,
LoadContext, LoadContext,
@ -29,7 +28,7 @@ export default function preset(
opts: Options = {}, opts: Options = {},
): Preset { ): Preset {
const {siteConfig} = context; const {siteConfig} = context;
const {themeConfig, baseUrl} = siteConfig; const {themeConfig} = siteConfig;
const {algolia} = themeConfig as Partial<ThemeConfig>; const {algolia} = themeConfig as Partial<ThemeConfig>;
const isProd = process.env.NODE_ENV === 'production'; const isProd = process.env.NODE_ENV === 'production';
const { const {
@ -37,13 +36,12 @@ export default function preset(
docs, docs,
blog, blog,
pages, pages,
sitemap = {}, sitemap,
theme, theme,
googleAnalytics, googleAnalytics,
gtag, gtag,
...rest ...rest
} = opts; } = opts;
const isDebugEnabled = debug || (debug === undefined && !isProd);
const themes: PluginConfig[] = []; const themes: PluginConfig[] = [];
themes.push(makePluginConfig('@docusaurus/theme-classic', theme)); themes.push(makePluginConfig('@docusaurus/theme-classic', theme));
@ -76,17 +74,13 @@ export default function preset(
makePluginConfig('@docusaurus/plugin-google-analytics', googleAnalytics), makePluginConfig('@docusaurus/plugin-google-analytics', googleAnalytics),
); );
} }
if (isDebugEnabled) { if (debug || (debug === undefined && !isProd)) {
plugins.push(require.resolve('@docusaurus/plugin-debug')); plugins.push(require.resolve('@docusaurus/plugin-debug'));
} }
if (gtag) { if (gtag) {
plugins.push(makePluginConfig('@docusaurus/plugin-google-gtag', gtag)); plugins.push(makePluginConfig('@docusaurus/plugin-google-gtag', gtag));
} }
if (isProd && sitemap !== false) { if (isProd && sitemap !== false) {
if (isDebugEnabled) {
sitemap.ignorePatterns ??= [];
sitemap.ignorePatterns.push(`${baseUrl}${debugPluginRouteBasePath}/**`);
}
plugins.push(makePluginConfig('@docusaurus/plugin-sitemap', sitemap)); plugins.push(makePluginConfig('@docusaurus/plugin-sitemap', sitemap));
} }
if (Object.keys(rest).length > 0) { if (Object.keys(rest).length > 0) {

View file

@ -19,6 +19,7 @@
"commander": "^5.1.0", "commander": "^5.1.0",
"history": "^4.9.0", "history": "^4.9.0",
"joi": "^17.6.0", "joi": "^17.6.0",
"react-helmet-async": "^1.2.3",
"utility-types": "^3.10.0", "utility-types": "^3.10.0",
"webpack": "^5.72.0", "webpack": "^5.72.0",
"webpack-merge": "^5.8.0" "webpack-merge": "^5.8.0"

View file

@ -10,6 +10,7 @@ import type {CustomizeRuleString} from 'webpack-merge/dist/types';
import type {CommanderStatic} from 'commander'; import type {CommanderStatic} from 'commander';
import type {ParsedUrlQueryInput} from 'querystring'; import type {ParsedUrlQueryInput} from 'querystring';
import type Joi from 'joi'; import type Joi from 'joi';
import type {HelmetServerState} from 'react-helmet-async';
import type { import type {
DeepRequired, DeepRequired,
Required as RequireKeys, Required as RequireKeys,
@ -319,7 +320,12 @@ export type Plugin<Content = unknown> = {
actions: PluginContentLoadedActions; actions: PluginContentLoadedActions;
}) => Promise<void> | void; }) => Promise<void> | void;
routesLoaded?: (routes: RouteConfig[]) => void; // TODO remove soon, deprecated (alpha-60) routesLoaded?: (routes: RouteConfig[]) => void; // TODO remove soon, deprecated (alpha-60)
postBuild?: (props: Props & {content: Content}) => Promise<void> | void; postBuild?: (
props: Props & {
content: Content;
head: {[location: string]: HelmetServerState};
},
) => Promise<void> | void;
// TODO refactor the configureWebpack API surface: use an object instead of // TODO refactor the configureWebpack API surface: use an object instead of
// multiple params (requires breaking change) // multiple params (requires breaking change)
configureWebpack?: ( configureWebpack?: (

View file

@ -70,6 +70,7 @@ async function doRender(locals: Locals & {path: string}) {
preBodyTags, preBodyTags,
postBodyTags, postBodyTags,
onLinksCollected, onLinksCollected,
onHeadTagsCollected,
baseUrl, baseUrl,
ssrTemplate, ssrTemplate,
noIndex, noIndex,
@ -105,6 +106,7 @@ async function doRender(locals: Locals & {path: string}) {
helmet.link.toString(), helmet.link.toString(),
helmet.script.toString(), helmet.script.toString(),
]; ];
onHeadTagsCollected(location, helmet);
const metaAttributes = metaStrings.filter(Boolean); const metaAttributes = metaStrings.filter(Boolean);
const {generatedFilesDir} = locals; const {generatedFilesDir} = locals;

View file

@ -27,6 +27,7 @@ import {
import CleanWebpackPlugin from '../webpack/plugins/CleanWebpackPlugin'; import CleanWebpackPlugin from '../webpack/plugins/CleanWebpackPlugin';
import {loadI18n} from '../server/i18n'; import {loadI18n} from '../server/i18n';
import {mapAsyncSequential} from '@docusaurus/utils'; import {mapAsyncSequential} from '@docusaurus/utils';
import type {HelmetServerState} from 'react-helmet-async';
export async function build( export async function build(
siteDir: string, siteDir: string,
@ -149,12 +150,16 @@ async function buildLocale({
); );
const allCollectedLinks: {[location: string]: string[]} = {}; const allCollectedLinks: {[location: string]: string[]} = {};
const headTags: {[location: string]: HelmetServerState} = {};
let serverConfig: Configuration = await createServerConfig({ let serverConfig: Configuration = await createServerConfig({
props, props,
onLinksCollected: (staticPagePath, links) => { onLinksCollected: (staticPagePath, links) => {
allCollectedLinks[staticPagePath] = links; allCollectedLinks[staticPagePath] = links;
}, },
onHeadTagsCollected: (staticPagePath, tags) => {
headTags[staticPagePath] = tags;
},
}); });
if (staticDirectories.length > 0) { if (staticDirectories.length > 0) {
@ -224,7 +229,11 @@ async function buildLocale({
if (!plugin.postBuild) { if (!plugin.postBuild) {
return; return;
} }
await plugin.postBuild({...props, content: plugin.content}); await plugin.postBuild({
...props,
head: headTags,
content: plugin.content,
});
}), }),
); );

View file

@ -35,6 +35,8 @@ declare module 'react-loadable-ssr-addon-v5-slorber' {
} }
declare module '@slorber/static-site-generator-webpack-plugin' { declare module '@slorber/static-site-generator-webpack-plugin' {
import type {HelmetServerState} from 'react-helmet-async';
export type Locals = { export type Locals = {
routesLocation: {[filePath: string]: string}; routesLocation: {[filePath: string]: string};
generatedFilesDir: string; generatedFilesDir: string;
@ -42,6 +44,10 @@ declare module '@slorber/static-site-generator-webpack-plugin' {
preBodyTags: string; preBodyTags: string;
postBodyTags: string; postBodyTags: string;
onLinksCollected: (staticPagePath: string, links: string[]) => void; onLinksCollected: (staticPagePath: string, links: string[]) => void;
onHeadTagsCollected: (
staticPagePath: string,
tags: HelmetServerState,
) => void;
baseUrl: string; baseUrl: string;
ssrTemplate: string; ssrTemplate: string;
noIndex: boolean; noIndex: boolean;

View file

@ -17,14 +17,16 @@ import {NODE_MAJOR_VERSION, NODE_MINOR_VERSION} from '@docusaurus/utils';
import ssrDefaultTemplate from './templates/ssr.html.template'; import ssrDefaultTemplate from './templates/ssr.html.template';
// Forked for Docusaurus: https://github.com/slorber/static-site-generator-webpack-plugin // Forked for Docusaurus: https://github.com/slorber/static-site-generator-webpack-plugin
import StaticSiteGeneratorPlugin from '@slorber/static-site-generator-webpack-plugin'; import StaticSiteGeneratorPlugin, {
type Locals,
} from '@slorber/static-site-generator-webpack-plugin';
export default async function createServerConfig({ export default async function createServerConfig({
props, props,
onLinksCollected = () => {}, onLinksCollected,
}: { onHeadTagsCollected,
}: Pick<Locals, 'onLinksCollected' | 'onHeadTagsCollected'> & {
props: Props; props: Props;
onLinksCollected?: (staticPagePath: string, links: string[]) => void;
}): Promise<Configuration> { }): Promise<Configuration> {
const { const {
baseUrl, baseUrl,
@ -73,6 +75,7 @@ export default async function createServerConfig({
preBodyTags, preBodyTags,
postBodyTags, postBodyTags,
onLinksCollected, onLinksCollected,
onHeadTagsCollected,
ssrTemplate: ssrTemplate ?? ssrDefaultTemplate, ssrTemplate: ssrTemplate ?? ssrDefaultTemplate,
noIndex, noIndex,
}, },