fix(content-blog): generate feed by reading build output (#6454)

This commit is contained in:
Joshua Chen 2022-01-26 23:54:15 +08:00 committed by GitHub
parent ebd5340205
commit 76a8d5f38a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
28 changed files with 364 additions and 249 deletions

View file

@ -22,7 +22,9 @@
"@docusaurus/logger": "2.0.0-beta.14",
"@docusaurus/mdx-loader": "2.0.0-beta.14",
"@docusaurus/utils": "2.0.0-beta.14",
"@docusaurus/utils-common": "2.0.0-beta.14",
"@docusaurus/utils-validation": "2.0.0-beta.14",
"cheerio": "^1.0.0-rc.10",
"feed": "^4.2.2",
"fs-extra": "^10.0.0",
"lodash": "^4.17.20",

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -44,10 +44,10 @@ async function testGenerateFeeds(
);
await createBlogFeedFiles({
blogPosts,
blogPosts: blogPosts.filter((post) => !post.metadata.frontMatter.draft),
options,
siteConfig: context.siteConfig,
outDir: 'build',
outDir: context.outDir,
});
}
@ -64,12 +64,14 @@ describe('blogFeed', () => {
url: 'https://docusaurus.io',
favicon: 'image/favicon.ico',
};
const outDir = path.join(siteDir, 'build-snap');
await testGenerateFeeds(
{
siteDir,
siteConfig,
i18n: DefaultI18N,
outDir,
} as LoadContext,
{
path: 'invalid-blog-path',
@ -92,7 +94,7 @@ describe('blogFeed', () => {
test('shows feed item for each post', async () => {
const siteDir = path.join(__dirname, '__fixtures__', 'website');
const generatedFilesDir = path.resolve(siteDir, '.docusaurus');
const outDir = path.join(siteDir, 'build-snap');
const siteConfig = {
title: 'Hello',
baseUrl: '/myBaseUrl/',
@ -100,12 +102,14 @@ describe('blogFeed', () => {
favicon: 'image/favicon.ico',
};
// Build is quite difficult to mock, so we built the blog beforehand and
// copied the output to the fixture...
await testGenerateFeeds(
{
siteDir,
siteConfig,
generatedFilesDir,
i18n: DefaultI18N,
outDir,
} as LoadContext,
{
path: 'blog',
@ -123,7 +127,7 @@ describe('blogFeed', () => {
} as PluginOptions,
);
expect(fsMock.mock.calls).toMatchSnapshot();
expect(fsMock.mock.calls.map((call) => call[1])).toMatchSnapshot();
fsMock.mockClear();
});
});

View file

@ -7,7 +7,13 @@
import {Feed, type Author as FeedAuthor, type Item as FeedItem} from 'feed';
import type {BlogPost} from './types';
import {normalizeUrl, mdxToHtml, posixPath} from '@docusaurus/utils';
import {
normalizeUrl,
posixPath,
mapAsyncSequential,
readOutputHTMLFile,
} from '@docusaurus/utils';
import cheerio from 'cheerio';
import type {DocusaurusConfig} from '@docusaurus/types';
import path from 'path';
import fs from 'fs-extra';
@ -16,28 +22,18 @@ import type {
PluginOptions,
Author,
} from '@docusaurus/plugin-content-blog';
// TODO this is temporary until we handle mdxToHtml better
// It's hard to convert reliably JSX/require calls to an html feed content
// See https://github.com/facebook/docusaurus/issues/5664
function mdxToFeedContent(mdxContent: string): string | undefined {
try {
return mdxToHtml(mdxContent);
} catch (e) {
// TODO will we need a plugin option to configure how to handle such an error
// Swallow the error on purpose for now, until we understand better the problem space
return undefined;
}
}
import {blogPostContainerID} from '@docusaurus/utils-common';
async function generateBlogFeed({
blogPosts,
options,
siteConfig,
outDir,
}: {
blogPosts: BlogPost[];
options: PluginOptions;
siteConfig: DocusaurusConfig;
outDir: string;
}): Promise<Feed | null> {
if (!blogPosts.length) {
return null;
@ -66,7 +62,7 @@ async function generateBlogFeed({
return {name: author.name, link: author.url};
}
blogPosts.forEach((post) => {
await mapAsyncSequential(blogPosts, async (post) => {
const {
id,
metadata: {
@ -79,6 +75,13 @@ async function generateBlogFeed({
},
} = post;
const content = await readOutputHTMLFile(
permalink.replace(siteConfig.baseUrl, ''),
outDir,
siteConfig.trailingSlash,
);
const $ = cheerio.load(content);
const feedItem: FeedItem = {
title: metadataTitle,
id,
@ -87,7 +90,7 @@ async function generateBlogFeed({
description,
// Atom feed demands the "term", while other feeds use "name"
category: tags.map((tag) => ({name: tag.label, term: tag.label})),
content: mdxToFeedContent(post.content),
content: $(`#${blogPostContainerID}`).html()!,
};
// json1() method takes the first item of authors array
@ -145,7 +148,12 @@ export async function createBlogFeedFiles({
siteConfig: DocusaurusConfig;
outDir: string;
}): Promise<void> {
const feed = await generateBlogFeed({blogPosts, options, siteConfig});
const feed = await generateBlogFeed({
blogPosts,
options,
siteConfig,
outDir,
});
const feedTypes = options.feedOptions.type;
if (!feed || !feedTypes) {

View file

@ -36,7 +36,6 @@ import {PluginOptionSchema} from './pluginOptionSchema';
import type {
LoadContext,
ConfigureWebpackUtils,
Props,
Plugin,
HtmlTags,
OptionValidationContext,
@ -512,14 +511,11 @@ export default async function pluginContentBlog(
};
},
async postBuild({outDir}: Props) {
async postBuild({outDir, content}) {
if (!options.feedOptions.type) {
return;
}
// TODO: we shouldn't need to re-read the posts here!
// postBuild should receive loadedContent
const blogPosts = await generateBlogPosts(contentPaths, context, options);
const {blogPosts} = content;
if (!blogPosts.length) {
return;
}

View file

@ -28,6 +28,7 @@
"@docusaurus/theme-common": "2.0.0-beta.14",
"@docusaurus/theme-translations": "2.0.0-beta.14",
"@docusaurus/utils": "2.0.0-beta.14",
"@docusaurus/utils-common": "2.0.0-beta.14",
"@docusaurus/utils-validation": "2.0.0-beta.14",
"@mdx-js/react": "^1.6.21",
"clsx": "^1.1.1",

View file

@ -12,6 +12,7 @@ import Translate, {translate} from '@docusaurus/Translate';
import Link from '@docusaurus/Link';
import {useBaseUrlUtils} from '@docusaurus/useBaseUrl';
import {usePluralForm} from '@docusaurus/theme-common';
import {blogPostContainerID} from '@docusaurus/utils-common';
import MDXComponents from '@theme/MDXComponents';
import EditThisPage from '@theme/EditThisPage';
import type {Props} from '@theme/BlogPostItem';
@ -102,7 +103,11 @@ function BlogPostItem(props: Props): JSX.Element {
<meta itemProp="image" content={withBaseUrl(image, {absolute: true})} />
)}
<div className="markdown" itemProp="articleBody">
<div
// This ID is used for the feed generation to locate the main content
id={isBlogPostPage ? blogPostContainerID : undefined}
className="markdown"
itemProp="articleBody">
<MDXProvider components={MDXComponents}>{children}</MDXProvider>
</div>

View file

@ -258,7 +258,7 @@ export interface Plugin<Content = unknown> {
actions: PluginContentLoadedActions;
}) => Promise<void>;
routesLoaded?: (routes: RouteConfig[]) => void; // TODO remove soon, deprecated (alpha-60)
postBuild?: (props: Props) => void;
postBuild?: (props: Props & {content: Content}) => Promise<void>;
postStart?: (props: Props) => void;
// TODO refactor the configureWebpack API surface: use an object instead of multiple params (requires breaking change)
configureWebpack?: (

View file

@ -5,5 +5,6 @@
* LICENSE file in the root directory of this source tree.
*/
export const blogPostContainerID = 'post-content';
export {default as applyTrailingSlash} from './applyTrailingSlash';
export type {ApplyTrailingSlashParams} from './applyTrailingSlash';

View file

@ -1,133 +0,0 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {mdxToHtml} from '../mdxUtils';
describe('mdxToHtml', () => {
test('work with simple markdown', () => {
const mdxString = `
# title
title text **bold**
## subtitle
subtitle text *italic*
> Quote
`;
expect(mdxToHtml(mdxString)).toMatchInlineSnapshot(
`"<h1>title</h1><p>title text <strong>bold</strong></p><h2>subtitle</h2><p>subtitle text <em>italic</em></p><blockquote><p>Quote</p></blockquote>"`,
);
});
test('work with MDX imports', () => {
const mdxString = `
# title
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
text
`;
expect(mdxToHtml(mdxString)).toMatchInlineSnapshot(
`"<h1>title</h1><p>text</p>"`,
);
});
test('work with MDX exports', () => {
const mdxString = `
# title
export const someExport = 42
export const MyLocalComponent = () => "result"
export const toc = [
{id: "title",label: "title"}
]
text
`;
expect(mdxToHtml(mdxString)).toMatchInlineSnapshot(
`"<h1>title</h1><p>text</p>"`,
);
});
test('work with MDX Tabs', () => {
const mdxString = `
# title
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
<Tabs>
<TabItem value="apple" label="Apple">
This is an apple 🍎
</TabItem>
<TabItem value="orange" label="Orange">
This is an orange 🍊
</TabItem>
</Tabs>
text
`;
// TODO this is not an ideal behavior!
// There is a warning "Component TabItem was not imported, exported, or provided by MDXProvider as global scope"
// Theme + MDX config should provide a list of React components to put in MDX scope
expect(mdxToHtml(mdxString)).toMatchInlineSnapshot(
`"<h1>title</h1><div><div value=\\"apple\\" label=\\"Apple\\">This is an apple 🍎</div><div value=\\"orange\\" label=\\"Orange\\">This is an orange 🍊</div></div><p>text</p>"`,
);
});
test('work with MDX Tabs with ```mdx-code-block', () => {
const mdxString = `
# title
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
\`\`\`mdx-code-block
<Tabs>
<TabItem value="apple" label="Apple">
This is an apple 🍎
</TabItem>
<TabItem value="orange" label="Orange">
This is an orange 🍊
</TabItem>
</Tabs>
\`\`\`
text
`;
// TODO bad behavior!
// ```mdx-code-block should be unwrapped and inner MDX content should be evaluated
expect(mdxToHtml(mdxString)).toMatchInlineSnapshot(`
"<h1>title</h1><pre><code class=\\"language-mdx-code-block\\">&lt;Tabs&gt;
&lt;TabItem value=&quot;apple&quot; label=&quot;Apple&quot;&gt;
This is an apple 🍎
&lt;/TabItem&gt;
&lt;TabItem value=&quot;orange&quot; label=&quot;Orange&quot;&gt;
This is an orange 🍊
&lt;/TabItem&gt;
&lt;/Tabs&gt;
</code></pre><p>text</p>"
`);
});
});

View file

@ -23,7 +23,6 @@ import {simpleHash, docuHash} from './hashUtils';
import {DEFAULT_PLUGIN_ID} from './constants';
export * from './constants';
export * from './mdxUtils';
export * from './urlUtils';
export * from './tags';
export * from './markdownParser';
@ -210,6 +209,39 @@ export function getPluginI18nPath({
);
}
/**
* @param permalink The URL that the HTML file corresponds to, without base URL
* @param outDir Full path to the output directory
* @param trailingSlash The site config option. If provided, only one path will be read.
* @returns This returns a buffer, which you have to decode string yourself if
* needed. (Not always necessary since the output isn't for human consumption
* anyways, and most HTML manipulation libs accept buffers)
*/
export async function readOutputHTMLFile(
permalink: string,
outDir: string,
trailingSlash: boolean | undefined,
): Promise<Buffer> {
const withTrailingSlashPath = path.join(outDir, permalink, 'index.html');
const withoutTrailingSlashPath = path.join(outDir, `${permalink}.html`);
if (trailingSlash) {
return fs.readFile(withTrailingSlashPath);
} else if (trailingSlash === false) {
return fs.readFile(withoutTrailingSlashPath);
} else {
const HTMLPath = await findAsyncSequential(
[withTrailingSlashPath, withoutTrailingSlashPath],
fs.pathExists,
);
if (!HTMLPath) {
throw new Error(
`Expected output HTML file to be found at ${withTrailingSlashPath}`,
);
}
return fs.readFile(HTMLPath);
}
}
export async function mapAsyncSequential<T, R>(
array: T[],
action: (t: T) => Promise<R>,

View file

@ -1,32 +0,0 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import React from 'react';
import ReactDOMServer from 'react-dom/server';
import MDX from '@mdx-js/runtime';
import removeImports from 'remark-mdx-remove-imports';
import removeExports from 'remark-mdx-remove-exports';
/**
* Transform mdx text to plain html text
* Initially created to convert MDX blog posts to HTML for the RSS feed
* without import/export nodes
*
* TODO not ideal implementation, won't work well with MDX elements!
* TODO theme+global site config should be able to declare MDX comps in scope for rendering the RSS feeds
* see also https://github.com/facebook/docusaurus/issues/4625
*/
export function mdxToHtml(
mdxStr: string,
// TODO allow providing components/scope here, see https://github.com/mdx-js/mdx/tree/v1.6.13/packages/runtime
): string {
return ReactDOMServer.renderToString(
React.createElement(MDX, {remarkPlugins: [removeImports, removeExports]}, [
mdxStr,
]),
);
}

View file

@ -176,7 +176,7 @@ async function buildLocale({
if (configureWebpack) {
clientConfig = applyConfigureWebpack(
configureWebpack.bind(plugin), // The plugin lifecycle may reference `this`. // TODO remove this implicit api: inject in callback instead
configureWebpack.bind(plugin), // The plugin lifecycle may reference `this`.
clientConfig,
false,
props.siteConfig.webpack?.jsLoader,
@ -184,7 +184,7 @@ async function buildLocale({
);
serverConfig = applyConfigureWebpack(
configureWebpack.bind(plugin), // The plugin lifecycle may reference `this`. // TODO remove this implicit api: inject in callback instead
configureWebpack.bind(plugin), // The plugin lifecycle may reference `this`.
serverConfig,
true,
props.siteConfig.webpack?.jsLoader,
@ -220,7 +220,8 @@ async function buildLocale({
if (!plugin.postBuild) {
return;
}
await plugin.postBuild(props);
// The plugin may reference `this`. We manually bind it again to prevent any bugs.
await plugin.postBuild({...props, content: plugin.content});
}),
);

View file

@ -281,6 +281,7 @@ interface Props {
postBodyTags: string;
routesPaths: string[];
plugins: Plugin<any>[];
content: Content;
}
```

View file

@ -6047,6 +6047,17 @@ chardet@^0.7.0:
resolved "https://registry.yarnpkg.com/chardet/-/chardet-0.7.0.tgz#90094849f0937f2eedc2425d0d28a9e5f0cbad9e"
integrity sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==
cheerio-select@^1.5.0:
version "1.5.0"
resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-1.5.0.tgz#faf3daeb31b17c5e1a9dabcee288aaf8aafa5823"
integrity sha512-qocaHPv5ypefh6YNxvnbABM07KMxExbtbfuJoIie3iZXX1ERwYmJcIiRrr9H05ucQP1k28dav8rpdDgjQd8drg==
dependencies:
css-select "^4.1.3"
css-what "^5.0.1"
domelementtype "^2.2.0"
domhandler "^4.2.0"
domutils "^2.7.0"
cheerio@^0.22.0:
version "0.22.0"
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-0.22.0.tgz#a9baa860a3f9b595a6b81b1a86873121ed3a269e"
@ -6069,6 +6080,19 @@ cheerio@^0.22.0:
lodash.reject "^4.4.0"
lodash.some "^4.4.0"
cheerio@^1.0.0-rc.10:
version "1.0.0-rc.10"
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.10.tgz#2ba3dcdfcc26e7956fc1f440e61d51c643379f3e"
integrity sha512-g0J0q/O6mW8z5zxQ3A8E8J1hUgp4SMOvEoW/x84OwyHKe/Zccz83PVT4y5Crcr530FV6NgmKI1qvGTKVl9XXVw==
dependencies:
cheerio-select "^1.5.0"
dom-serializer "^1.3.2"
domhandler "^4.2.0"
htmlparser2 "^6.1.0"
parse5 "^6.0.1"
parse5-htmlparser2-tree-adapter "^6.0.1"
tslib "^2.2.0"
chokidar@^3.0.2, chokidar@^3.4.0, chokidar@^3.4.2, chokidar@^3.5.2:
version "3.5.2"
resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.2.tgz#dba3976fcadb016f66fd365021d91600d01c1e75"
@ -7117,7 +7141,7 @@ css-what@2.1:
resolved "https://registry.yarnpkg.com/css-what/-/css-what-2.1.3.tgz#a6d7604573365fe74686c3f311c56513d88285f2"
integrity sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg==
css-what@^5.1.0:
css-what@^5.0.1, css-what@^5.1.0:
version "5.1.0"
resolved "https://registry.yarnpkg.com/css-what/-/css-what-5.1.0.tgz#3f7b707aadf633baf62c2ceb8579b545bb40f7fe"
integrity sha512-arSMRWIIFY0hV8pIxZMEfmMI47Wj3R/aWpZDDxWYCPEiOMv6tfOrnpDtgxBYPEQD4V0Y/958+1TdC3iWTFcUPw==
@ -7757,7 +7781,7 @@ dom-serializer@0:
domelementtype "^2.0.1"
entities "^2.0.0"
dom-serializer@^1.0.1:
dom-serializer@^1.0.1, dom-serializer@^1.3.2:
version "1.3.2"
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-1.3.2.tgz#6206437d32ceefaec7161803230c7a20bc1b4d91"
integrity sha512-5c54Bk5Dw4qAxNOI1pFEizPSjVsx5+bpJKmL2kPn8JhBUq2q09tTCa3mjijun2NfK78NMouDYNMBkOrPZiS+ig==
@ -7826,7 +7850,7 @@ domutils@^1.5.1:
dom-serializer "0"
domelementtype "1"
domutils@^2.5.2, domutils@^2.8.0:
domutils@^2.5.2, domutils@^2.7.0, domutils@^2.8.0:
version "2.8.0"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.8.0.tgz#4437def5db6e2d1f5d6ee859bd95ca7d02048135"
integrity sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==
@ -14544,7 +14568,7 @@ parse-url@^6.0.0:
parse-path "^4.0.0"
protocols "^1.4.0"
parse5-htmlparser2-tree-adapter@^6.0.0:
parse5-htmlparser2-tree-adapter@^6.0.0, parse5-htmlparser2-tree-adapter@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz#2cdf9ad823321140370d4dbf5d3e92c7c8ddc6e6"
integrity sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==
@ -18450,7 +18474,7 @@ tslib@^1.8.1, tslib@^1.9.0:
resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00"
integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==
tslib@^2.0.1, tslib@^2.0.3, tslib@^2.1.0, tslib@^2.3.1:
tslib@^2.0.1, tslib@^2.0.3, tslib@^2.1.0, tslib@^2.2.0, tslib@^2.3.1:
version "2.3.1"
resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.3.1.tgz#e8a335add5ceae51aa261d32a490158ef042ef01"
integrity sha512-77EbyPPpMz+FRFRuAFlWMtmgUWGe9UOG2Z25NqCwiIjRhOf5iKGuzSe5P2w1laq+FkRy4p+PCuVkJSGkzTEKVw==