mirror of
https://github.com/facebook/docusaurus.git
synced 2025-04-29 10:17:55 +02:00
fix(sitemap): filter all routes with robots meta containing noindex (#7964)
This commit is contained in:
parent
39883e70de
commit
a335a69982
4 changed files with 90 additions and 13 deletions
|
@ -158,7 +158,10 @@ describe('createSitemap', () => {
|
|||
meta: {
|
||||
// @ts-expect-error: bad lib def
|
||||
toComponent: () => [
|
||||
React.createElement('meta', {name: 'robots', content: 'noindex'}),
|
||||
React.createElement('meta', {
|
||||
name: 'robots',
|
||||
content: 'NoFolloW, NoiNDeX',
|
||||
}),
|
||||
],
|
||||
},
|
||||
},
|
||||
|
|
|
@ -13,6 +13,40 @@ import type {DocusaurusConfig} from '@docusaurus/types';
|
|||
import type {HelmetServerState} from 'react-helmet-async';
|
||||
import type {PluginOptions} from './options';
|
||||
|
||||
function isNoIndexMetaRoute({
|
||||
head,
|
||||
route,
|
||||
}: {
|
||||
head: {[location: string]: HelmetServerState};
|
||||
route: string;
|
||||
}) {
|
||||
const isNoIndexMetaTag = ({
|
||||
name,
|
||||
content,
|
||||
}: {
|
||||
name?: string;
|
||||
content?: string;
|
||||
}): boolean => {
|
||||
if (!name || !content) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
// meta name is not case-sensitive
|
||||
name.toLowerCase() === 'robots' &&
|
||||
// Robots directives are not case-sensitive
|
||||
content.toLowerCase().includes('noindex')
|
||||
);
|
||||
};
|
||||
|
||||
// https://github.com/staylor/react-helmet-async/pull/167
|
||||
const meta = head[route]?.meta.toComponent() as unknown as
|
||||
| ReactElement<{name?: string; content?: string}>[]
|
||||
| undefined;
|
||||
return meta?.some((tag) =>
|
||||
isNoIndexMetaTag({name: tag.props.name, content: tag.props.content}),
|
||||
);
|
||||
}
|
||||
|
||||
export default async function createSitemap(
|
||||
siteConfig: DocusaurusConfig,
|
||||
routesPaths: string[],
|
||||
|
@ -27,18 +61,15 @@ export default async function createSitemap(
|
|||
|
||||
const ignoreMatcher = createMatcher(ignorePatterns);
|
||||
|
||||
const includedRoutes = routesPaths.filter((route) => {
|
||||
if (route.endsWith('404.html') || ignoreMatcher(route)) {
|
||||
return false;
|
||||
}
|
||||
// https://github.com/staylor/react-helmet-async/pull/167
|
||||
const meta = head[route]?.meta.toComponent() as unknown as
|
||||
| ReactElement<{name?: string; content?: string}>[]
|
||||
| undefined;
|
||||
return !meta?.some(
|
||||
(tag) => tag.props.name === 'robots' && tag.props.content === 'noindex',
|
||||
function isRouteExcluded(route: string) {
|
||||
return (
|
||||
route.endsWith('404.html') ||
|
||||
ignoreMatcher(route) ||
|
||||
isNoIndexMetaRoute({head, route})
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
const includedRoutes = routesPaths.filter((route) => !isRouteExcluded(route));
|
||||
|
||||
if (includedRoutes.length === 0) {
|
||||
return null;
|
||||
|
|
|
@ -124,7 +124,11 @@ Read more about the robots file in [the Google documentation](https://developers
|
|||
|
||||
:::caution
|
||||
|
||||
**Important**: the `robots.txt` file does **not** prevent HTML pages from being indexed. Use `<meta name="robots" content="noindex">` as [page metadata](#single-page-metadata) to prevent it from appearing in search results entirely.
|
||||
**Important**: the `robots.txt` file does **not** prevent HTML pages from being indexed.
|
||||
|
||||
To prevent your whole Docusaurus site from being indexed, use the [`noIndex`](./api/docusaurus.config.js.md#noIndex) site config. Some [hosting providers](./deployment.mdx) may also let you configure a `X-Robots-Tag: noindex` HTTP header (GitHub Pages does not support this).
|
||||
|
||||
To prevent a single page from being indexed, use `<meta name="robots" content="noindex">` as [page metadata](#single-page-metadata). Read more about the [robots meta tag](https://developers.google.com/search/docs/advanced/robots/robots_meta_tag).
|
||||
|
||||
:::
|
||||
|
||||
|
@ -132,6 +136,20 @@ Read more about the robots file in [the Google documentation](https://developers
|
|||
|
||||
Docusaurus provides the [`@docusaurus/plugin-sitemap`](./api/plugins/plugin-sitemap.md) plugin, which is shipped with `preset-classic` by default. It autogenerates a `sitemap.xml` file which will be available at `https://example.com/[baseUrl]/sitemap.xml` after the production build. This sitemap metadata helps search engine crawlers crawl your site more accurately.
|
||||
|
||||
:::tip
|
||||
|
||||
The sitemap plugin automatically filters pages containing a `noindex` [robots meta directive](https://developers.google.com/search/docs/advanced/robots/robots_meta_tag).
|
||||
|
||||
For example, [`/examples/noIndex`](/examples/noIndex) is not included in the [Docusaurus sitemap.xml file](pathname:///sitemap.xml) because it contains the following [page metadata](#single-page-metadata):
|
||||
|
||||
```html
|
||||
<head>
|
||||
<meta name="robots" content="noindex, nofollow" />
|
||||
</head>
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
## Human readable links {#human-readable-links}
|
||||
|
||||
Docusaurus uses your file names as links, but you can always change that using slugs, see this [tutorial](./guides/docs/docs-introduction.md#document-id) for more details.
|
||||
|
|
25
website/src/pages/examples/noIndex.md
Normal file
25
website/src/pages/examples/noIndex.md
Normal file
|
@ -0,0 +1,25 @@
|
|||
# No Index Page example
|
||||
|
||||
<head>
|
||||
<meta name="robots" content="nOiNdeX, NoFolLoW" />
|
||||
</head>
|
||||
|
||||
This page will not be indexed by search engines because it contains the page following [page metadata](/docs/seo#single-page-metadata) markup:
|
||||
|
||||
```html
|
||||
<head>
|
||||
<meta name="robots" content="noindex, nofollow" />
|
||||
</head>
|
||||
```
|
||||
|
||||
:::tip
|
||||
|
||||
The sitemap plugin filters pages containing a `noindex` content value. This page doesn't appear in Docusaurus [sitemap.xml](pathname:///sitemap.xml) file.
|
||||
|
||||
:::
|
||||
|
||||
:::note
|
||||
|
||||
Robots directives are [case-insensitive](https://developers.google.com/search/docs/advanced/robots/robots_meta_tag#directives).
|
||||
|
||||
:::
|
Loading…
Add table
Reference in a new issue