feat(sitemap): add ignorePatterns option (#6979)

Co-authored-by: Joshua Chen <sidachen2003@gmail.com>
This commit is contained in:
ApsarasX 2022-04-06 21:44:07 +08:00 committed by GitHub
parent bd70cfc1d7
commit 103ea04661
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 78 additions and 14 deletions

View file

@ -19,6 +19,7 @@ describe('createSitemap', () => {
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
ignorePatterns: [],
},
);
expect(sitemap).toContain(
@ -42,11 +43,34 @@ describe('createSitemap', () => {
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
ignorePatterns: [],
},
);
expect(sitemap).not.toContain('404');
});
it('excludes patterns configured to be ignored', async () => {
const sitemap = await createSitemap(
{
url: 'https://example.com',
} as DocusaurusConfig,
['/', '/search/', '/tags/', '/search/foo', '/tags/foo/bar'],
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
ignorePatterns: [
// Shallow ignore
'/search/',
// Deep ignore
'/tags/**',
],
},
);
expect(sitemap).not.toContain('/search/</loc>');
expect(sitemap).toContain('/search/foo');
expect(sitemap).not.toContain('/tags');
});
it('keep trailing slash unchanged', async () => {
const sitemap = await createSitemap(
{
@ -57,6 +81,7 @@ describe('createSitemap', () => {
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
ignorePatterns: [],
},
);
@ -76,6 +101,7 @@ describe('createSitemap', () => {
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
ignorePatterns: [],
},
);
@ -95,6 +121,7 @@ describe('createSitemap', () => {
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
ignorePatterns: [],
},
);

View file

@ -27,6 +27,7 @@ describe('validateOptions', () => {
const userOptions = {
changefreq: 'yearly',
priority: 0.9,
ignorePatterns: ['/search/**'],
};
expect(testValidate(userOptions)).toEqual({
...defaultOptions,
@ -49,4 +50,17 @@ describe('validateOptions', () => {
`"\\"changefreq\\" must be one of [daily, monthly, always, hourly, weekly, yearly, never]"`,
);
});
it('rejects bad ignorePatterns inputs', () => {
expect(() =>
testValidate({ignorePatterns: '/search'}),
).toThrowErrorMatchingInlineSnapshot(
`"\\"ignorePatterns\\" must be an array"`,
);
expect(() =>
testValidate({ignorePatterns: [/^\/search/]}),
).toThrowErrorMatchingInlineSnapshot(
`"\\"ignorePatterns[0]\\" must be a string"`,
);
});
});

View file

@ -6,25 +6,28 @@
*/
import {SitemapStream, streamToPromise} from 'sitemap';
import type {Options} from '@docusaurus/plugin-sitemap';
import type {PluginOptions} from '@docusaurus/plugin-sitemap';
import type {DocusaurusConfig} from '@docusaurus/types';
import {applyTrailingSlash} from '@docusaurus/utils-common';
import {createMatcher} from '@docusaurus/utils';
export default async function createSitemap(
siteConfig: DocusaurusConfig,
routesPaths: string[],
options: Options,
options: PluginOptions,
): Promise<string> {
const {url: hostname} = siteConfig;
if (!hostname) {
throw new Error('URL in docusaurus.config.js cannot be empty/undefined.');
}
const {changefreq, priority} = options;
const {changefreq, priority, ignorePatterns} = options;
const ignoreMatcher = createMatcher(ignorePatterns);
const sitemapStream = new SitemapStream({hostname});
routesPaths
.filter((route) => !route.endsWith('404.html'))
.filter((route) => !route.endsWith('404.html') && !ignoreMatcher(route))
.forEach((routePath) =>
sitemapStream.write({
url: applyTrailingSlash(routePath, {

View file

@ -7,13 +7,13 @@
import fs from 'fs-extra';
import path from 'path';
import type {Options} from '@docusaurus/plugin-sitemap';
import type {PluginOptions} from '@docusaurus/plugin-sitemap';
import createSitemap from './createSitemap';
import type {LoadContext, Plugin} from '@docusaurus/types';
export default function pluginSitemap(
context: LoadContext,
options: Options,
options: PluginOptions,
): Plugin<void> {
return {
name: 'docusaurus-plugin-sitemap',

View file

@ -7,12 +7,13 @@
import {Joi} from '@docusaurus/utils-validation';
import {EnumChangefreq} from 'sitemap';
import type {Options} from '@docusaurus/plugin-sitemap';
import type {Options, PluginOptions} from '@docusaurus/plugin-sitemap';
import type {OptionValidationContext} from '@docusaurus/types';
export const DEFAULT_OPTIONS: Options = {
export const DEFAULT_OPTIONS: PluginOptions = {
changefreq: EnumChangefreq.WEEKLY,
priority: 0.5,
ignorePatterns: [],
};
const PluginOptionSchema = Joi.object({
@ -24,6 +25,9 @@ const PluginOptionSchema = Joi.object({
.valid(...Object.values(EnumChangefreq))
.default(DEFAULT_OPTIONS.changefreq),
priority: Joi.number().min(0).max(1).default(DEFAULT_OPTIONS.priority),
ignorePatterns: Joi.array()
.items(Joi.string())
.default(DEFAULT_OPTIONS.ignorePatterns),
trailingSlash: Joi.forbidden().messages({
'any.unknown':
'Please use the new Docusaurus global trailingSlash config instead, and the sitemaps plugin will use it.',
@ -33,7 +37,7 @@ const PluginOptionSchema = Joi.object({
export function validateOptions({
validate,
options,
}: OptionValidationContext<Options, Options>): Options {
}: OptionValidationContext<Options, PluginOptions>): PluginOptions {
const validatedOptions = validate(PluginOptionSchema, options);
return validatedOptions;
}

View file

@ -7,10 +7,16 @@
import type {EnumChangefreq} from 'sitemap';
export type Options = {
id?: string;
export type PluginOptions = {
/** @see https://www.sitemaps.org/protocol.html#xmlTagDefinitions */
changefreq?: EnumChangefreq;
changefreq: EnumChangefreq;
/** @see https://www.sitemaps.org/protocol.html#xmlTagDefinitions */
priority?: number;
priority: number;
/**
* A list of glob patterns; matching route paths will be filtered from the
* sitemap. Note that you may need to include the base URL in here.
*/
ignorePatterns: string[];
};
export type Options = Partial<PluginOptions>;

View file

@ -31,11 +31,16 @@ type Matcher = (str: string) => boolean;
* A very thin wrapper around `Micromatch.makeRe`.
*
* @see {@link createAbsoluteFilePathMatcher}
* @param patterns A list of glob patterns.
* @param patterns A list of glob patterns. If the list is empty, it defaults to
* matching none.
* @returns A matcher handle that tells if a file path is matched by any of the
* patterns.
*/
export function createMatcher(patterns: string[]): Matcher {
if (patterns.length === 0) {
// `/(?:)/.test("foo")` is `true`
return () => false;
}
const regexp = new RegExp(
patterns.map((pattern) => Micromatch.makeRe(pattern).source).join('|'),
);

View file

@ -39,6 +39,7 @@ Accepted fields:
| --- | --- | --- | --- |
| `changefreq` | `string` | `'weekly'` | See [sitemap docs](https://www.sitemaps.org/protocol.html#xmlTagDefinitions) |
| `priority` | `number` | `0.5` | See [sitemap docs](https://www.sitemaps.org/protocol.html#xmlTagDefinitions) |
| `ignorePatterns` | `string[]` | `[]` | A list of glob patterns; matching route paths will be filtered from the sitemap. Note that you may need to include the base URL in here. |
</APITable>
@ -68,6 +69,7 @@ Most Docusaurus users configure this plugin through the preset options.
const config = {
changefreq: 'weekly',
priority: 0.5,
ignorePatterns: ['/tags/**'],
};
```

View file

@ -338,6 +338,9 @@ const config = {
trackingID: 'UA-141789564-1',
}
: undefined,
sitemap: {
ignorePatterns: ['/tests/**'],
},
}),
],
],